treetagger-ruby 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.rdoc +10 -11
- data/bin/rtt +9 -8
- data/lib/tree_tagger.rb +2 -0
- data/lib/tree_tagger/error.rb +3 -7
- data/lib/tree_tagger/tagger.rb +1 -1
- data/lib/tree_tagger/version.rb +1 -1
- data/lib/treetagger.rb +1 -0
- metadata +80 -96
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8519196f4c23977ddf07e1d1b6952783a70f6541
|
4
|
+
data.tar.gz: 2681f5fe030a2018088633a95089a30a219ca43e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3f6c7ba1755be3efbc472b280c7c59f01020b22256de72fbff9cd34d7f383ead4ee4d4eb7e816c31ac53e72d40547323bf6bf0ba3859a450e19f385c115d03f8
|
7
|
+
data.tar.gz: d655886a3a6ae267992fdfbdb10d918207f2fc13cb986d0459b55d59cc8b5a5d35d29dc1b73c0b1bc2891bea024925d5cb0801b74dc860a2da7c52f0df0cc7c1
|
data/README.rdoc
CHANGED
@@ -1,23 +1,22 @@
|
|
1
1
|
= TreeTagger for Ruby
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
3
|
+
{RubyGems}[http://rubygems.org/gems/treetagger-ruby] | {RTT Project Page}[http://bu.chsta.be/projects/treetagger-ruby/] |
|
4
|
+
{Source Code}[https://github.com/arbox/treetagger-ruby] | {Bug Tracker}[https://github.com/arbox/treetagger-ruby/issues]
|
5
|
+
|
6
|
+
{<img src="https://badge.fury.io/rb/treetagger-ruby.png" alt="Gem Version" />}[http://badge.fury.io/rb/treetagger-ruby]
|
7
|
+
{<img src="https://travis-ci.org/arbox/treetagger-ruby.png" alt="Build Status" />}[https://travis-ci.org/arbox/treetagger-ruby]
|
8
|
+
{<img src="https://codeclimate.com/github/arbox/treetagger-ruby.png" alt="Code Climate" />}[https://codeclimate.com/github/arbox/treetagger-ruby]
|
8
9
|
|
9
10
|
== DESCRIPTION
|
10
11
|
A Ruby based wrapper for the TreeTagger by Helmut Schmid.
|
11
12
|
|
12
|
-
Check it out if you are interested in Natural Language Processing (NLP)
|
13
|
-
and/or Human Language Technology (HLT).
|
13
|
+
Check it out if you are interested in Natural Language Processing (NLP) and/or Human Language Technology (HLT).
|
14
14
|
|
15
15
|
This library provides comprehensive bindings for the
|
16
|
-
{TreeTagger}[http://www.
|
16
|
+
{TreeTagger}[http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/],
|
17
17
|
a statistical language independed POS tagging and chunking software.
|
18
18
|
|
19
|
-
TreeTagger is language agnostic, it will never guess what language you're going
|
20
|
-
to use. It
|
19
|
+
TreeTagger is language agnostic, it will never guess what language you're going to use.
|
21
20
|
|
22
21
|
TODO:
|
23
22
|
* References to Schmid's publications;
|
@@ -75,7 +74,7 @@ Alternatively use your Gemfile for dependency management.
|
|
75
74
|
== SYNOPSIS
|
76
75
|
=== Basic Usage
|
77
76
|
Basic usage is very simple:
|
78
|
-
$ require 'treetagger
|
77
|
+
$ require 'treetagger'
|
79
78
|
$ # Instantiate a tagger instance with default values.
|
80
79
|
$ tagger = TreeTagger::Tagger.new
|
81
80
|
$ # Process an array of tokens.
|
data/bin/rtt
CHANGED
@@ -10,10 +10,10 @@ tagger = TreeTagger::Tagger.new(options)
|
|
10
10
|
|
11
11
|
# Adding some colors to the output.
|
12
12
|
# Using ANSI escape codes.
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
RED = "\e[31m"
|
14
|
+
GREEN = "\e[32m"
|
15
|
+
BLUE = "\e[34m"
|
16
|
+
RESET = "\e[0m"
|
17
17
|
|
18
18
|
reader = Thread.new do
|
19
19
|
beginning = true
|
@@ -34,20 +34,21 @@ reader = Thread.new do
|
|
34
34
|
tuple = tuple.split("\t")
|
35
35
|
|
36
36
|
if $stdout.tty?
|
37
|
-
tuple[0].insert(0,
|
38
|
-
tuple[1].insert(0,
|
39
|
-
tuple[2].insert(0,
|
37
|
+
tuple[0].insert(0, RED).insert(-1, RESET) if tuple[0]
|
38
|
+
tuple[1].insert(0, GREEN).insert(-1, RESET) if tuple[1]
|
39
|
+
tuple[2].insert(0, BLUE).insert(-1, RESET) if tuple[2]
|
40
40
|
end
|
41
41
|
|
42
42
|
# [['token', 'tag', 'lemma'], ['token', 'tag', 'lemma']]`
|
43
43
|
$stdout.puts tuple.join("\t")
|
44
44
|
end
|
45
|
-
end
|
45
|
+
end # loop end
|
46
46
|
end
|
47
47
|
|
48
48
|
# Read all lines from STDOUT or from files.
|
49
49
|
while line = ARGF.gets
|
50
50
|
# Invoke tokenizer somehow here.
|
51
|
+
puts line
|
51
52
|
tagger.process(line)
|
52
53
|
end
|
53
54
|
|
data/lib/tree_tagger.rb
ADDED
data/lib/tree_tagger/error.rb
CHANGED
@@ -4,16 +4,12 @@ module TreeTagger
|
|
4
4
|
class Error < StandardError; end
|
5
5
|
|
6
6
|
# Somethig went wrong: no env variable, data not coded prperly etc.
|
7
|
-
|
8
|
-
end
|
7
|
+
ExternalError = Class.new(Error)
|
9
8
|
|
10
9
|
# Exectution error, an assert like exception.
|
11
|
-
|
12
|
-
|
13
|
-
end
|
10
|
+
RuntimeError = Class.new(Error)
|
14
11
|
|
15
12
|
# User tries to use the lib in a wrong manner, e.g. provides
|
16
13
|
# wrong parameters.
|
17
|
-
|
18
|
-
end
|
14
|
+
UserError = Class.new(Error)
|
19
15
|
end
|
data/lib/tree_tagger/tagger.rb
CHANGED
@@ -25,7 +25,7 @@ module TreeTagger
|
|
25
25
|
# The flushing sentence can be shortened down to this size.
|
26
26
|
FLUSH_SENTENCE = "Das\nist\nein\nTestsatz\n,\num\ndas\nStossen\nder\nDaten\nsicherzustellen\n."
|
27
27
|
|
28
|
-
# Initializer
|
28
|
+
# Initializer comment
|
29
29
|
def initialize(opts = {
|
30
30
|
:binary => nil,
|
31
31
|
:model => nil,
|
data/lib/tree_tagger/version.rb
CHANGED
data/lib/treetagger.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'tree_tagger'
|
metadata
CHANGED
@@ -1,148 +1,132 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: treetagger-ruby
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 0
|
10
|
-
version: 0.1.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
11
5
|
platform: ruby
|
12
|
-
authors:
|
6
|
+
authors:
|
13
7
|
- Andrei Beliankou
|
14
8
|
autorequire:
|
15
9
|
bindir: bin
|
16
10
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
11
|
+
date: 2014-12-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
21
14
|
name: rdoc
|
22
|
-
|
23
|
-
|
24
|
-
none: false
|
25
|
-
requirements:
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
26
17
|
- - ">="
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
hash: 33
|
29
|
-
segments:
|
30
|
-
- 3
|
31
|
-
- 9
|
32
|
-
- 1
|
18
|
+
- !ruby/object:Gem::Version
|
33
19
|
version: 3.9.1
|
34
20
|
type: :development
|
35
|
-
version_requirements: *id001
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: bundler
|
38
21
|
prerelease: false
|
39
|
-
|
40
|
-
|
41
|
-
requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
42
24
|
- - ">="
|
43
|
-
- !ruby/object:Gem::Version
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.9.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
48
34
|
type: :development
|
49
|
-
version_requirements: *id002
|
50
|
-
- !ruby/object:Gem::Dependency
|
51
|
-
name: yard
|
52
35
|
prerelease: false
|
53
|
-
|
54
|
-
|
55
|
-
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: yard
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
56
45
|
- - ">="
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
|
59
|
-
segments:
|
60
|
-
- 0
|
61
|
-
version: "0"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
62
48
|
type: :development
|
63
|
-
version_requirements: *id003
|
64
|
-
- !ruby/object:Gem::Dependency
|
65
|
-
name: rake
|
66
49
|
prerelease: false
|
67
|
-
|
68
|
-
|
69
|
-
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
70
59
|
- - ">="
|
71
|
-
- !ruby/object:Gem::Version
|
72
|
-
|
73
|
-
segments:
|
74
|
-
- 0
|
75
|
-
version: "0"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
76
62
|
type: :development
|
77
|
-
|
78
|
-
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: This package contains a simple wrapper for the TreeTagger, a POS tagger
|
70
|
+
based on decision trees and developed by Helmut Schmid at IMS in Stuttgart, Germany.
|
71
|
+
You should have the TreeTagger with all library files installed on your machine
|
72
|
+
in order to use this wrapper.
|
79
73
|
email: a.belenkow@uni-trier.de
|
80
|
-
executables:
|
74
|
+
executables:
|
81
75
|
- rtt
|
82
76
|
extensions: []
|
83
|
-
|
84
|
-
extra_rdoc_files:
|
77
|
+
extra_rdoc_files:
|
85
78
|
- README.rdoc
|
86
79
|
- LICENCE.rdoc
|
87
80
|
- CHANGELOG.rdoc
|
88
|
-
files:
|
81
|
+
files:
|
82
|
+
- ".yardopts"
|
83
|
+
- CHANGELOG.rdoc
|
84
|
+
- LICENCE.rdoc
|
85
|
+
- README.rdoc
|
86
|
+
- bin/rtt
|
87
|
+
- lib/tree_tagger.rb
|
88
|
+
- lib/tree_tagger/argv_parser.rb
|
89
89
|
- lib/tree_tagger/chunker.rb
|
90
90
|
- lib/tree_tagger/error.rb
|
91
|
-
- lib/tree_tagger/argv_parser.rb
|
92
91
|
- lib/tree_tagger/tagger.rb
|
93
92
|
- lib/tree_tagger/version.rb
|
94
|
-
-
|
95
|
-
- LICENCE.rdoc
|
96
|
-
- CHANGELOG.rdoc
|
97
|
-
- .yardopts
|
93
|
+
- lib/treetagger.rb
|
98
94
|
- test/test_tagger.rb
|
99
95
|
- test/tree-tagger/corrupted_lexicon_file.txt
|
100
|
-
- test/tree-tagger/lexicon_file.txt
|
101
96
|
- test/tree-tagger/corrupted_model_file.par
|
97
|
+
- test/tree-tagger/lexicon_file.txt
|
102
98
|
- test/tree-tagger/model_file.par
|
103
99
|
- test/tree-tagger/tree-tagger
|
104
|
-
- bin/rtt
|
105
100
|
homepage: http://www.uni-trier.de/index.php?id=34451
|
106
101
|
licenses: []
|
107
|
-
|
102
|
+
metadata: {}
|
108
103
|
post_install_message:
|
109
|
-
rdoc_options:
|
110
|
-
- -m
|
104
|
+
rdoc_options:
|
105
|
+
- "-m"
|
111
106
|
- README.rdoc
|
112
|
-
require_paths:
|
107
|
+
require_paths:
|
113
108
|
- lib
|
114
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
115
|
-
|
116
|
-
requirements:
|
109
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
117
111
|
- - ">="
|
118
|
-
- !ruby/object:Gem::Version
|
119
|
-
hash: 57
|
120
|
-
segments:
|
121
|
-
- 1
|
122
|
-
- 8
|
123
|
-
- 7
|
112
|
+
- !ruby/object:Gem::Version
|
124
113
|
version: 1.8.7
|
125
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
126
|
-
|
127
|
-
requirements:
|
114
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
115
|
+
requirements:
|
128
116
|
- - ">="
|
129
|
-
- !ruby/object:Gem::Version
|
130
|
-
|
131
|
-
segments:
|
132
|
-
- 0
|
133
|
-
version: "0"
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: '0'
|
134
119
|
requirements: []
|
135
|
-
|
136
120
|
rubyforge_project:
|
137
|
-
rubygems_version:
|
121
|
+
rubygems_version: 2.2.2
|
138
122
|
signing_key:
|
139
|
-
specification_version:
|
123
|
+
specification_version: 4
|
140
124
|
summary: A wrapper for the TreeTagger by Helmut Schmid.
|
141
|
-
test_files:
|
125
|
+
test_files:
|
142
126
|
- test/test_tagger.rb
|
143
127
|
- test/tree-tagger/corrupted_lexicon_file.txt
|
144
|
-
- test/tree-tagger/lexicon_file.txt
|
145
128
|
- test/tree-tagger/corrupted_model_file.par
|
129
|
+
- test/tree-tagger/lexicon_file.txt
|
146
130
|
- test/tree-tagger/model_file.par
|
147
131
|
- test/tree-tagger/tree-tagger
|
148
132
|
has_rdoc:
|