opener-tree-tagger 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +150 -0
- data/bin/opener-tree-tagger-daemon +7 -0
- data/bin/opener-tree-tagger-server +11 -0
- data/bin/tree-tagger +7 -0
- data/config.ru +5 -0
- data/core/dutch.map.treetagger.kaf.csv +40 -0
- data/core/english.map.treetagger.kaf.csv +36 -0
- data/core/french.map.treetagger.kaf.csv +33 -0
- data/core/german.map.treetagger.kaf.csv +52 -0
- data/core/italian.map.treetagger.kaf.csv +38 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
- data/core/spanish.map.treetagger.kaf.csv +75 -0
- data/core/token_matcher.py +82 -0
- data/core/tt_from_kaf_to_kaf.py +215 -0
- data/exec/tree-tagger.rb +9 -0
- data/ext/hack/Rakefile +13 -0
- data/ext/hack/support.rb +38 -0
- data/lib/opener/tree_tagger.rb +69 -0
- data/lib/opener/tree_tagger/cli.rb +69 -0
- data/lib/opener/tree_tagger/public/markdown.css +284 -0
- data/lib/opener/tree_tagger/server.rb +16 -0
- data/lib/opener/tree_tagger/version.rb +5 -0
- data/lib/opener/tree_tagger/views/index.erb +96 -0
- data/lib/opener/tree_tagger/views/result.erb +15 -0
- data/opener-tree-tagger.gemspec +35 -0
- data/pre_build_requirements.txt +1 -0
- metadata +197 -0
@@ -0,0 +1,69 @@
|
|
1
|
+
module Opener
|
2
|
+
class TreeTagger
|
3
|
+
class CLI
|
4
|
+
attr_reader :options, :option_parser
|
5
|
+
|
6
|
+
##
|
7
|
+
# @param [Hash] options
|
8
|
+
#
|
9
|
+
def initialize(options = {})
|
10
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
11
|
+
|
12
|
+
@option_parser = OptionParser.new do |opts|
|
13
|
+
opts.program_name = 'tree-tagger'
|
14
|
+
opts.summary_indent = ' '
|
15
|
+
|
16
|
+
opts.on('-l', '--log', 'Enable logging to STDERR') do
|
17
|
+
@options[:logging] = true
|
18
|
+
end
|
19
|
+
|
20
|
+
opts.separator <<-EOF
|
21
|
+
|
22
|
+
Examples:
|
23
|
+
|
24
|
+
cat example.kaf | #{opts.program_name} # Basic usage
|
25
|
+
cat example.kaf | #{opts.program_name} -l # Logs information to STDERR
|
26
|
+
EOF
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# @param [String] input
|
32
|
+
#
|
33
|
+
def run(input)
|
34
|
+
option_parser.parse!(options[:args])
|
35
|
+
|
36
|
+
tagger = TreeTagger.new(options)
|
37
|
+
|
38
|
+
stdout, stderr, process = tagger.run(input)
|
39
|
+
|
40
|
+
if process.success?
|
41
|
+
puts stdout
|
42
|
+
|
43
|
+
if options[:logging] and !stderr.empty?
|
44
|
+
STDERR.puts(stderr)
|
45
|
+
end
|
46
|
+
else
|
47
|
+
abort stderr
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
##
|
54
|
+
# Shows the help message and exits the program.
|
55
|
+
#
|
56
|
+
def show_help
|
57
|
+
abort option_parser.to_s
|
58
|
+
end
|
59
|
+
|
60
|
+
##
|
61
|
+
# Shows the version and exits the program.
|
62
|
+
#
|
63
|
+
def show_version
|
64
|
+
abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
@@ -0,0 +1,284 @@
|
|
1
|
+
|
2
|
+
input[type="text"], textarea
|
3
|
+
{
|
4
|
+
width: 500px;
|
5
|
+
}
|
6
|
+
|
7
|
+
body {
|
8
|
+
font-family: Helvetica, arial, sans-serif;
|
9
|
+
font-size: 14px;
|
10
|
+
line-height: 1.6;
|
11
|
+
padding-top: 10px;
|
12
|
+
padding-bottom: 10px;
|
13
|
+
background-color: white;
|
14
|
+
padding: 30px; }
|
15
|
+
|
16
|
+
body > *:first-child {
|
17
|
+
margin-top: 0 !important; }
|
18
|
+
body > *:last-child {
|
19
|
+
margin-bottom: 0 !important; }
|
20
|
+
|
21
|
+
a {
|
22
|
+
color: #4183C4; }
|
23
|
+
a.absent {
|
24
|
+
color: #cc0000; }
|
25
|
+
a.anchor {
|
26
|
+
display: block;
|
27
|
+
padding-left: 30px;
|
28
|
+
margin-left: -30px;
|
29
|
+
cursor: pointer;
|
30
|
+
position: absolute;
|
31
|
+
top: 0;
|
32
|
+
left: 0;
|
33
|
+
bottom: 0; }
|
34
|
+
|
35
|
+
h1, h2, h3, h4, h5, h6 {
|
36
|
+
margin: 20px 0 10px;
|
37
|
+
padding: 0;
|
38
|
+
font-weight: bold;
|
39
|
+
-webkit-font-smoothing: antialiased;
|
40
|
+
cursor: text;
|
41
|
+
position: relative; }
|
42
|
+
|
43
|
+
h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, h5:hover a.anchor, h6:hover a.anchor {
|
44
|
+
background: url("../../images/modules/styleguide/para.png") no-repeat 10px center;
|
45
|
+
text-decoration: none; }
|
46
|
+
|
47
|
+
h1 tt, h1 code {
|
48
|
+
font-size: inherit; }
|
49
|
+
|
50
|
+
h2 tt, h2 code {
|
51
|
+
font-size: inherit; }
|
52
|
+
|
53
|
+
h3 tt, h3 code {
|
54
|
+
font-size: inherit; }
|
55
|
+
|
56
|
+
h4 tt, h4 code {
|
57
|
+
font-size: inherit; }
|
58
|
+
|
59
|
+
h5 tt, h5 code {
|
60
|
+
font-size: inherit; }
|
61
|
+
|
62
|
+
h6 tt, h6 code {
|
63
|
+
font-size: inherit; }
|
64
|
+
|
65
|
+
h1 {
|
66
|
+
font-size: 28px;
|
67
|
+
color: black; }
|
68
|
+
|
69
|
+
h2 {
|
70
|
+
font-size: 24px;
|
71
|
+
border-bottom: 1px solid #cccccc;
|
72
|
+
color: black; }
|
73
|
+
|
74
|
+
h3 {
|
75
|
+
font-size: 18px; }
|
76
|
+
|
77
|
+
h4 {
|
78
|
+
font-size: 16px; }
|
79
|
+
|
80
|
+
h5 {
|
81
|
+
font-size: 14px; }
|
82
|
+
|
83
|
+
h6 {
|
84
|
+
color: #777777;
|
85
|
+
font-size: 14px; }
|
86
|
+
|
87
|
+
p, blockquote, ul, ol, dl, li, table, pre {
|
88
|
+
margin: 15px 0; }
|
89
|
+
|
90
|
+
hr {
|
91
|
+
background: transparent url("../../images/modules/pulls/dirty-shade.png") repeat-x 0 0;
|
92
|
+
border: 0 none;
|
93
|
+
color: #cccccc;
|
94
|
+
height: 4px;
|
95
|
+
padding: 0; }
|
96
|
+
|
97
|
+
body > h2:first-child {
|
98
|
+
margin-top: 0;
|
99
|
+
padding-top: 0; }
|
100
|
+
body > h1:first-child {
|
101
|
+
margin-top: 0;
|
102
|
+
padding-top: 0; }
|
103
|
+
body > h1:first-child + h2 {
|
104
|
+
margin-top: 0;
|
105
|
+
padding-top: 0; }
|
106
|
+
body > h3:first-child, body > h4:first-child, body > h5:first-child, body > h6:first-child {
|
107
|
+
margin-top: 0;
|
108
|
+
padding-top: 0; }
|
109
|
+
|
110
|
+
a:first-child h1, a:first-child h2, a:first-child h3, a:first-child h4, a:first-child h5, a:first-child h6 {
|
111
|
+
margin-top: 0;
|
112
|
+
padding-top: 0; }
|
113
|
+
|
114
|
+
h1 p, h2 p, h3 p, h4 p, h5 p, h6 p {
|
115
|
+
margin-top: 0; }
|
116
|
+
|
117
|
+
li p.first {
|
118
|
+
display: inline-block; }
|
119
|
+
|
120
|
+
ul, ol {
|
121
|
+
padding-left: 30px; }
|
122
|
+
|
123
|
+
ul :first-child, ol :first-child {
|
124
|
+
margin-top: 0; }
|
125
|
+
|
126
|
+
ul :last-child, ol :last-child {
|
127
|
+
margin-bottom: 0; }
|
128
|
+
|
129
|
+
dl {
|
130
|
+
padding: 0; }
|
131
|
+
dl dt {
|
132
|
+
font-size: 14px;
|
133
|
+
font-weight: bold;
|
134
|
+
font-style: italic;
|
135
|
+
padding: 0;
|
136
|
+
margin: 15px 0 5px; }
|
137
|
+
dl dt:first-child {
|
138
|
+
padding: 0; }
|
139
|
+
dl dt > :first-child {
|
140
|
+
margin-top: 0; }
|
141
|
+
dl dt > :last-child {
|
142
|
+
margin-bottom: 0; }
|
143
|
+
dl dd {
|
144
|
+
margin: 0 0 15px;
|
145
|
+
padding: 0 15px; }
|
146
|
+
dl dd > :first-child {
|
147
|
+
margin-top: 0; }
|
148
|
+
dl dd > :last-child {
|
149
|
+
margin-bottom: 0; }
|
150
|
+
|
151
|
+
blockquote {
|
152
|
+
border-left: 4px solid #dddddd;
|
153
|
+
padding: 0 15px;
|
154
|
+
color: #777777; }
|
155
|
+
blockquote > :first-child {
|
156
|
+
margin-top: 0; }
|
157
|
+
blockquote > :last-child {
|
158
|
+
margin-bottom: 0; }
|
159
|
+
|
160
|
+
table {
|
161
|
+
padding: 0; }
|
162
|
+
table tr {
|
163
|
+
border-top: 1px solid #cccccc;
|
164
|
+
background-color: white;
|
165
|
+
margin: 0;
|
166
|
+
padding: 0; }
|
167
|
+
table tr:nth-child(2n) {
|
168
|
+
background-color: #f8f8f8; }
|
169
|
+
table tr th {
|
170
|
+
font-weight: bold;
|
171
|
+
border: 1px solid #cccccc;
|
172
|
+
text-align: left;
|
173
|
+
margin: 0;
|
174
|
+
padding: 6px 13px; }
|
175
|
+
table tr td {
|
176
|
+
border: 1px solid #cccccc;
|
177
|
+
text-align: left;
|
178
|
+
margin: 0;
|
179
|
+
padding: 6px 13px; }
|
180
|
+
table tr th :first-child, table tr td :first-child {
|
181
|
+
margin-top: 0; }
|
182
|
+
table tr th :last-child, table tr td :last-child {
|
183
|
+
margin-bottom: 0; }
|
184
|
+
|
185
|
+
img {
|
186
|
+
max-width: 100%; }
|
187
|
+
|
188
|
+
span.frame {
|
189
|
+
display: block;
|
190
|
+
overflow: hidden; }
|
191
|
+
span.frame > span {
|
192
|
+
border: 1px solid #dddddd;
|
193
|
+
display: block;
|
194
|
+
float: left;
|
195
|
+
overflow: hidden;
|
196
|
+
margin: 13px 0 0;
|
197
|
+
padding: 7px;
|
198
|
+
width: auto; }
|
199
|
+
span.frame span img {
|
200
|
+
display: block;
|
201
|
+
float: left; }
|
202
|
+
span.frame span span {
|
203
|
+
clear: both;
|
204
|
+
color: #333333;
|
205
|
+
display: block;
|
206
|
+
padding: 5px 0 0; }
|
207
|
+
span.align-center {
|
208
|
+
display: block;
|
209
|
+
overflow: hidden;
|
210
|
+
clear: both; }
|
211
|
+
span.align-center > span {
|
212
|
+
display: block;
|
213
|
+
overflow: hidden;
|
214
|
+
margin: 13px auto 0;
|
215
|
+
text-align: center; }
|
216
|
+
span.align-center span img {
|
217
|
+
margin: 0 auto;
|
218
|
+
text-align: center; }
|
219
|
+
span.align-right {
|
220
|
+
display: block;
|
221
|
+
overflow: hidden;
|
222
|
+
clear: both; }
|
223
|
+
span.align-right > span {
|
224
|
+
display: block;
|
225
|
+
overflow: hidden;
|
226
|
+
margin: 13px 0 0;
|
227
|
+
text-align: right; }
|
228
|
+
span.align-right span img {
|
229
|
+
margin: 0;
|
230
|
+
text-align: right; }
|
231
|
+
span.float-left {
|
232
|
+
display: block;
|
233
|
+
margin-right: 13px;
|
234
|
+
overflow: hidden;
|
235
|
+
float: left; }
|
236
|
+
span.float-left span {
|
237
|
+
margin: 13px 0 0; }
|
238
|
+
span.float-right {
|
239
|
+
display: block;
|
240
|
+
margin-left: 13px;
|
241
|
+
overflow: hidden;
|
242
|
+
float: right; }
|
243
|
+
span.float-right > span {
|
244
|
+
display: block;
|
245
|
+
overflow: hidden;
|
246
|
+
margin: 13px auto 0;
|
247
|
+
text-align: right; }
|
248
|
+
|
249
|
+
code, tt {
|
250
|
+
margin: 0 2px;
|
251
|
+
padding: 0 5px;
|
252
|
+
white-space: nowrap;
|
253
|
+
border: 1px solid #eaeaea;
|
254
|
+
background-color: #f8f8f8;
|
255
|
+
border-radius: 3px; }
|
256
|
+
|
257
|
+
pre code {
|
258
|
+
margin: 0;
|
259
|
+
padding: 0;
|
260
|
+
white-space: pre;
|
261
|
+
border: none;
|
262
|
+
background: transparent; }
|
263
|
+
|
264
|
+
.highlight pre {
|
265
|
+
background-color: #f8f8f8;
|
266
|
+
border: 1px solid #cccccc;
|
267
|
+
font-size: 13px;
|
268
|
+
line-height: 19px;
|
269
|
+
overflow: auto;
|
270
|
+
padding: 6px 10px;
|
271
|
+
border-radius: 3px; }
|
272
|
+
|
273
|
+
pre {
|
274
|
+
background-color: #f8f8f8;
|
275
|
+
border: 1px solid #cccccc;
|
276
|
+
font-size: 13px;
|
277
|
+
line-height: 19px;
|
278
|
+
overflow: auto;
|
279
|
+
padding: 6px 10px;
|
280
|
+
border-radius: 3px; }
|
281
|
+
pre code, pre tt {
|
282
|
+
background-color: transparent;
|
283
|
+
border: none; }
|
284
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'sinatra/base'
|
2
|
+
require 'httpclient'
|
3
|
+
require 'opener/webservice'
|
4
|
+
|
5
|
+
module Opener
|
6
|
+
class TreeTagger
|
7
|
+
##
|
8
|
+
# Polarity tagger server powered by Sinatra.
|
9
|
+
#
|
10
|
+
class Server < Webservice
|
11
|
+
set :views, File.expand_path('../views', __FILE__)
|
12
|
+
text_processor TreeTagger
|
13
|
+
accepted_params :input
|
14
|
+
end # Server
|
15
|
+
end # PolarityTagger
|
16
|
+
end # Opener
|
@@ -0,0 +1,96 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link type="text/css" rel="stylesheet" charset="UTF-8" href="markdown.css"/>
|
5
|
+
<title>Tree Tagger Web Service</title>
|
6
|
+
</head>
|
7
|
+
<body>
|
8
|
+
<h1>Tree Tagger Web Service</h1>
|
9
|
+
|
10
|
+
<h2>Example Usage</h2>
|
11
|
+
|
12
|
+
<p>
|
13
|
+
<pre>opener-tree-tagger-server start</pre>
|
14
|
+
</p>
|
15
|
+
|
16
|
+
<h2>Try the webservice</h2>
|
17
|
+
|
18
|
+
<p>* required</p>
|
19
|
+
<p>** When entering a value no response will be displayed in the browser.</p>
|
20
|
+
|
21
|
+
<form action="<%=url("/")%>" method="POST">
|
22
|
+
<div>
|
23
|
+
<label for="input"/>Type your text here*</label>
|
24
|
+
<br/>
|
25
|
+
|
26
|
+
<textarea name="input" id="text" rows="10" cols="50"/></textarea>
|
27
|
+
</div>
|
28
|
+
|
29
|
+
<% 10.times do |t| %>
|
30
|
+
<div>
|
31
|
+
<label for="callbacks">Callback URL <%=t+1%>(**)</label>
|
32
|
+
<br />
|
33
|
+
|
34
|
+
<input id="callbacks" type="text" name="callbacks[]" />
|
35
|
+
</div>
|
36
|
+
<% end %>
|
37
|
+
|
38
|
+
|
39
|
+
<div>
|
40
|
+
<label for="error_callback">Error Callback</label>
|
41
|
+
<br />
|
42
|
+
|
43
|
+
<input id="error_callback" type="text" name="error_callback" />
|
44
|
+
</div>
|
45
|
+
<input type="submit" value="Submit" />
|
46
|
+
</form>
|
47
|
+
|
48
|
+
<h2>Actions</h2>
|
49
|
+
|
50
|
+
<p>
|
51
|
+
<dl>
|
52
|
+
<dt>POST /</dt>
|
53
|
+
<dd>Tag the input tokenized text. See arguments listing for more options.</dd>
|
54
|
+
<dt>GET /</dt>
|
55
|
+
<dd>Show this page</dd>
|
56
|
+
</dl>
|
57
|
+
</p>
|
58
|
+
|
59
|
+
<h2>Arguments</h2>
|
60
|
+
|
61
|
+
<p> The webservice takes the following arguments: </p>
|
62
|
+
<p>* required</p>
|
63
|
+
|
64
|
+
<dl>
|
65
|
+
<dt>text*</dt>
|
66
|
+
<dd>The input text in KAF format. Sample KAF input:</dd>
|
67
|
+
<pre></pre>
|
68
|
+
|
69
|
+
<dt>callbacks</dt>
|
70
|
+
<dd>
|
71
|
+
You can provide a list of callback urls. If you provide callback urls
|
72
|
+
the POS tagger will run as a background job and a callback
|
73
|
+
with the results will be performed (POST) to the first url in the callback
|
74
|
+
list. The other urls in callback list will be provided in the "callbacks"
|
75
|
+
argument.<br/><br/>
|
76
|
+
Using callback you can chain together several OpeNER webservices in
|
77
|
+
one call. The first, will call the second, which will call the third, etc.
|
78
|
+
See for more information the <a href="http://opener-project.github.io">
|
79
|
+
webservice documentation online</a>.
|
80
|
+
</dd>
|
81
|
+
<dt>error_callback</dt>
|
82
|
+
<dd>URL to notify if errors occur in the background process. The error
|
83
|
+
callback will do a POST with the error message in the 'error' field.</dd>
|
84
|
+
</dt>
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
</dl>
|
89
|
+
|
90
|
+
|
91
|
+
<p>
|
92
|
+
|
93
|
+
</p>
|
94
|
+
|
95
|
+
</body>
|
96
|
+
</html>
|