treetagger-ruby 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.rdoc +10 -11
- data/bin/rtt +9 -8
- data/lib/tree_tagger.rb +2 -0
- data/lib/tree_tagger/error.rb +3 -7
- data/lib/tree_tagger/tagger.rb +1 -1
- data/lib/tree_tagger/version.rb +1 -1
- data/lib/treetagger.rb +1 -0
- metadata +80 -96
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8519196f4c23977ddf07e1d1b6952783a70f6541
|
4
|
+
data.tar.gz: 2681f5fe030a2018088633a95089a30a219ca43e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3f6c7ba1755be3efbc472b280c7c59f01020b22256de72fbff9cd34d7f383ead4ee4d4eb7e816c31ac53e72d40547323bf6bf0ba3859a450e19f385c115d03f8
|
7
|
+
data.tar.gz: d655886a3a6ae267992fdfbdb10d918207f2fc13cb986d0459b55d59cc8b5a5d35d29dc1b73c0b1bc2891bea024925d5cb0801b74dc860a2da7c52f0df0cc7c1
|
data/README.rdoc
CHANGED
@@ -1,23 +1,22 @@
|
|
1
1
|
= TreeTagger for Ruby
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
3
|
+
{RubyGems}[http://rubygems.org/gems/treetagger-ruby] | {RTT Project Page}[http://bu.chsta.be/projects/treetagger-ruby/] |
|
4
|
+
{Source Code}[https://github.com/arbox/treetagger-ruby] | {Bug Tracker}[https://github.com/arbox/treetagger-ruby/issues]
|
5
|
+
|
6
|
+
{<img src="https://badge.fury.io/rb/treetagger-ruby.png" alt="Gem Version" />}[http://badge.fury.io/rb/treetagger-ruby]
|
7
|
+
{<img src="https://travis-ci.org/arbox/treetagger-ruby.png" alt="Build Status" />}[https://travis-ci.org/arbox/treetagger-ruby]
|
8
|
+
{<img src="https://codeclimate.com/github/arbox/treetagger-ruby.png" alt="Code Climate" />}[https://codeclimate.com/github/arbox/treetagger-ruby]
|
8
9
|
|
9
10
|
== DESCRIPTION
|
10
11
|
A Ruby based wrapper for the TreeTagger by Helmut Schmid.
|
11
12
|
|
12
|
-
Check it out if you are interested in Natural Language Processing (NLP)
|
13
|
-
and/or Human Language Technology (HLT).
|
13
|
+
Check it out if you are interested in Natural Language Processing (NLP) and/or Human Language Technology (HLT).
|
14
14
|
|
15
15
|
This library provides comprehensive bindings for the
|
16
|
-
{TreeTagger}[http://www.
|
16
|
+
{TreeTagger}[http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/],
|
17
17
|
a statistical language independed POS tagging and chunking software.
|
18
18
|
|
19
|
-
TreeTagger is language agnostic, it will never guess what language you're going
|
20
|
-
to use. It
|
19
|
+
TreeTagger is language agnostic, it will never guess what language you're going to use.
|
21
20
|
|
22
21
|
TODO:
|
23
22
|
* References to Schmid's publications;
|
@@ -75,7 +74,7 @@ Alternatively use your Gemfile for dependency management.
|
|
75
74
|
== SYNOPSIS
|
76
75
|
=== Basic Usage
|
77
76
|
Basic usage is very simple:
|
78
|
-
$ require 'treetagger
|
77
|
+
$ require 'treetagger'
|
79
78
|
$ # Instantiate a tagger instance with default values.
|
80
79
|
$ tagger = TreeTagger::Tagger.new
|
81
80
|
$ # Process an array of tokens.
|
data/bin/rtt
CHANGED
@@ -10,10 +10,10 @@ tagger = TreeTagger::Tagger.new(options)
|
|
10
10
|
|
11
11
|
# Adding some colors to the output.
|
12
12
|
# Using ANSI escape codes.
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
RED = "\e[31m"
|
14
|
+
GREEN = "\e[32m"
|
15
|
+
BLUE = "\e[34m"
|
16
|
+
RESET = "\e[0m"
|
17
17
|
|
18
18
|
reader = Thread.new do
|
19
19
|
beginning = true
|
@@ -34,20 +34,21 @@ reader = Thread.new do
|
|
34
34
|
tuple = tuple.split("\t")
|
35
35
|
|
36
36
|
if $stdout.tty?
|
37
|
-
tuple[0].insert(0,
|
38
|
-
tuple[1].insert(0,
|
39
|
-
tuple[2].insert(0,
|
37
|
+
tuple[0].insert(0, RED).insert(-1, RESET) if tuple[0]
|
38
|
+
tuple[1].insert(0, GREEN).insert(-1, RESET) if tuple[1]
|
39
|
+
tuple[2].insert(0, BLUE).insert(-1, RESET) if tuple[2]
|
40
40
|
end
|
41
41
|
|
42
42
|
# [['token', 'tag', 'lemma'], ['token', 'tag', 'lemma']]`
|
43
43
|
$stdout.puts tuple.join("\t")
|
44
44
|
end
|
45
|
-
end
|
45
|
+
end # loop end
|
46
46
|
end
|
47
47
|
|
48
48
|
# Read all lines from STDOUT or from files.
|
49
49
|
while line = ARGF.gets
|
50
50
|
# Invoke tokenizer somehow here.
|
51
|
+
puts line
|
51
52
|
tagger.process(line)
|
52
53
|
end
|
53
54
|
|
data/lib/tree_tagger.rb
ADDED
data/lib/tree_tagger/error.rb
CHANGED
@@ -4,16 +4,12 @@ module TreeTagger
|
|
4
4
|
class Error < StandardError; end
|
5
5
|
|
6
6
|
# Somethig went wrong: no env variable, data not coded prperly etc.
|
7
|
-
|
8
|
-
end
|
7
|
+
ExternalError = Class.new(Error)
|
9
8
|
|
10
9
|
# Exectution error, an assert like exception.
|
11
|
-
|
12
|
-
|
13
|
-
end
|
10
|
+
RuntimeError = Class.new(Error)
|
14
11
|
|
15
12
|
# User tries to use the lib in a wrong manner, e.g. provides
|
16
13
|
# wrong parameters.
|
17
|
-
|
18
|
-
end
|
14
|
+
UserError = Class.new(Error)
|
19
15
|
end
|
data/lib/tree_tagger/tagger.rb
CHANGED
@@ -25,7 +25,7 @@ module TreeTagger
|
|
25
25
|
# The flushing sentence can be shortened down to this size.
|
26
26
|
FLUSH_SENTENCE = "Das\nist\nein\nTestsatz\n,\num\ndas\nStossen\nder\nDaten\nsicherzustellen\n."
|
27
27
|
|
28
|
-
# Initializer
|
28
|
+
# Initializer comment
|
29
29
|
def initialize(opts = {
|
30
30
|
:binary => nil,
|
31
31
|
:model => nil,
|
data/lib/tree_tagger/version.rb
CHANGED
data/lib/treetagger.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'tree_tagger'
|
metadata
CHANGED
@@ -1,148 +1,132 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: treetagger-ruby
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 0
|
10
|
-
version: 0.1.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
11
5
|
platform: ruby
|
12
|
-
authors:
|
6
|
+
authors:
|
13
7
|
- Andrei Beliankou
|
14
8
|
autorequire:
|
15
9
|
bindir: bin
|
16
10
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
11
|
+
date: 2014-12-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
21
14
|
name: rdoc
|
22
|
-
|
23
|
-
|
24
|
-
none: false
|
25
|
-
requirements:
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
26
17
|
- - ">="
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
hash: 33
|
29
|
-
segments:
|
30
|
-
- 3
|
31
|
-
- 9
|
32
|
-
- 1
|
18
|
+
- !ruby/object:Gem::Version
|
33
19
|
version: 3.9.1
|
34
20
|
type: :development
|
35
|
-
version_requirements: *id001
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: bundler
|
38
21
|
prerelease: false
|
39
|
-
|
40
|
-
|
41
|
-
requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
42
24
|
- - ">="
|
43
|
-
- !ruby/object:Gem::Version
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.9.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
48
34
|
type: :development
|
49
|
-
version_requirements: *id002
|
50
|
-
- !ruby/object:Gem::Dependency
|
51
|
-
name: yard
|
52
35
|
prerelease: false
|
53
|
-
|
54
|
-
|
55
|
-
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: yard
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
56
45
|
- - ">="
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
|
59
|
-
segments:
|
60
|
-
- 0
|
61
|
-
version: "0"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
62
48
|
type: :development
|
63
|
-
version_requirements: *id003
|
64
|
-
- !ruby/object:Gem::Dependency
|
65
|
-
name: rake
|
66
49
|
prerelease: false
|
67
|
-
|
68
|
-
|
69
|
-
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
70
59
|
- - ">="
|
71
|
-
- !ruby/object:Gem::Version
|
72
|
-
|
73
|
-
segments:
|
74
|
-
- 0
|
75
|
-
version: "0"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
76
62
|
type: :development
|
77
|
-
|
78
|
-
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: This package contains a simple wrapper for the TreeTagger, a POS tagger
|
70
|
+
based on decision trees and developed by Helmut Schmid at IMS in Stuttgart, Germany.
|
71
|
+
You should have the TreeTagger with all library files installed on your machine
|
72
|
+
in order to use this wrapper.
|
79
73
|
email: a.belenkow@uni-trier.de
|
80
|
-
executables:
|
74
|
+
executables:
|
81
75
|
- rtt
|
82
76
|
extensions: []
|
83
|
-
|
84
|
-
extra_rdoc_files:
|
77
|
+
extra_rdoc_files:
|
85
78
|
- README.rdoc
|
86
79
|
- LICENCE.rdoc
|
87
80
|
- CHANGELOG.rdoc
|
88
|
-
files:
|
81
|
+
files:
|
82
|
+
- ".yardopts"
|
83
|
+
- CHANGELOG.rdoc
|
84
|
+
- LICENCE.rdoc
|
85
|
+
- README.rdoc
|
86
|
+
- bin/rtt
|
87
|
+
- lib/tree_tagger.rb
|
88
|
+
- lib/tree_tagger/argv_parser.rb
|
89
89
|
- lib/tree_tagger/chunker.rb
|
90
90
|
- lib/tree_tagger/error.rb
|
91
|
-
- lib/tree_tagger/argv_parser.rb
|
92
91
|
- lib/tree_tagger/tagger.rb
|
93
92
|
- lib/tree_tagger/version.rb
|
94
|
-
-
|
95
|
-
- LICENCE.rdoc
|
96
|
-
- CHANGELOG.rdoc
|
97
|
-
- .yardopts
|
93
|
+
- lib/treetagger.rb
|
98
94
|
- test/test_tagger.rb
|
99
95
|
- test/tree-tagger/corrupted_lexicon_file.txt
|
100
|
-
- test/tree-tagger/lexicon_file.txt
|
101
96
|
- test/tree-tagger/corrupted_model_file.par
|
97
|
+
- test/tree-tagger/lexicon_file.txt
|
102
98
|
- test/tree-tagger/model_file.par
|
103
99
|
- test/tree-tagger/tree-tagger
|
104
|
-
- bin/rtt
|
105
100
|
homepage: http://www.uni-trier.de/index.php?id=34451
|
106
101
|
licenses: []
|
107
|
-
|
102
|
+
metadata: {}
|
108
103
|
post_install_message:
|
109
|
-
rdoc_options:
|
110
|
-
- -m
|
104
|
+
rdoc_options:
|
105
|
+
- "-m"
|
111
106
|
- README.rdoc
|
112
|
-
require_paths:
|
107
|
+
require_paths:
|
113
108
|
- lib
|
114
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
115
|
-
|
116
|
-
requirements:
|
109
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
117
111
|
- - ">="
|
118
|
-
- !ruby/object:Gem::Version
|
119
|
-
hash: 57
|
120
|
-
segments:
|
121
|
-
- 1
|
122
|
-
- 8
|
123
|
-
- 7
|
112
|
+
- !ruby/object:Gem::Version
|
124
113
|
version: 1.8.7
|
125
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
126
|
-
|
127
|
-
requirements:
|
114
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
115
|
+
requirements:
|
128
116
|
- - ">="
|
129
|
-
- !ruby/object:Gem::Version
|
130
|
-
|
131
|
-
segments:
|
132
|
-
- 0
|
133
|
-
version: "0"
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: '0'
|
134
119
|
requirements: []
|
135
|
-
|
136
120
|
rubyforge_project:
|
137
|
-
rubygems_version:
|
121
|
+
rubygems_version: 2.2.2
|
138
122
|
signing_key:
|
139
|
-
specification_version:
|
123
|
+
specification_version: 4
|
140
124
|
summary: A wrapper for the TreeTagger by Helmut Schmid.
|
141
|
-
test_files:
|
125
|
+
test_files:
|
142
126
|
- test/test_tagger.rb
|
143
127
|
- test/tree-tagger/corrupted_lexicon_file.txt
|
144
|
-
- test/tree-tagger/lexicon_file.txt
|
145
128
|
- test/tree-tagger/corrupted_model_file.par
|
129
|
+
- test/tree-tagger/lexicon_file.txt
|
146
130
|
- test/tree-tagger/model_file.par
|
147
131
|
- test/tree-tagger/tree-tagger
|
148
132
|
has_rdoc:
|