opener-pos-tagger 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +164 -0
- data/bin/pos-tagger +7 -0
- data/bin/pos-tagger-server +10 -0
- data/config.ru +4 -0
- data/lib/opener/pos_tagger.rb +90 -0
- data/lib/opener/pos_tagger/cli.rb +73 -0
- data/lib/opener/pos_tagger/public/markdown.css +283 -0
- data/lib/opener/pos_tagger/server.rb +16 -0
- data/lib/opener/pos_tagger/version.rb +5 -0
- data/lib/opener/pos_tagger/views/index.erb +163 -0
- data/lib/opener/pos_tagger/views/result.erb +15 -0
- data/opener-pos-tagger.gemspec +35 -0
- metadata +197 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ccc6c90ace4f3e79af9d820dd7b773ccffdd65fe
|
4
|
+
data.tar.gz: 3af96ef7ef65f6210ff076f3c8dc605e5394ec0f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 70f3208c340084a7fa7c5a7f1701c3aa4bdd39f7bbe43b47a652c6da09179eedf7d3a5ef1f5680f827c953e6063935af11ca343d10a38e7552f47b95a08773a6
|
7
|
+
data.tar.gz: a72e6d5d3cdd505179e9bac632ba9bd9cf125a34a2b579942c8a98b7d62f7ebb34cec01e50f90d4fd22a9257f6e44ccbe22798e35730f95fe381af0b15d2801a
|
data/README.md
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
POS-tagger
|
2
|
+
------------
|
3
|
+
|
4
|
+
Component that wraps the different existing POS Taggers based on OpenNLP.
|
5
|
+
|
6
|
+
### Confused by some terminology?
|
7
|
+
|
8
|
+
This software is part of a larger collection of natural language processing
|
9
|
+
tools known as "the OpeNER project". You can find more information about the
|
10
|
+
project at (the OpeNER portal)[http://opener-project.github.io]. There you can
|
11
|
+
also find references to terms like KAF (an XML standard to represent linguistic
|
12
|
+
annotations in texts), component, cores, scenario's and pipelines.
|
13
|
+
|
14
|
+
Quick Use Example
|
15
|
+
-----------------
|
16
|
+
|
17
|
+
Installing the pos-tagger can be done by executing:
|
18
|
+
|
19
|
+
gem install opener-pos-tagger
|
20
|
+
|
21
|
+
Please bare in mind that all components in OpeNER take KAF as an input and
|
22
|
+
output KAF by default.
|
23
|
+
|
24
|
+
### Command line interface
|
25
|
+
|
26
|
+
You should now be able to call the POS tagger as a regular shell
|
27
|
+
command: by its name. Once installed the gem normalyl sits in your path so you can call it directly from anywhere.
|
28
|
+
|
29
|
+
This aplication reads a text from standard input in order to identify the language.
|
30
|
+
|
31
|
+
POS Tagging some text (assuming that the above text is in a file called *english.kaf*):
|
32
|
+
|
33
|
+
cat english.kaf | pos-tagger
|
34
|
+
|
35
|
+
Will result in
|
36
|
+
|
37
|
+
<?xml version='1.0' encoding='UTF-8'?>
|
38
|
+
<KAF version="v1.opener" xml:lang="en">
|
39
|
+
<kafHeader>
|
40
|
+
<linguisticProcessors layer="text">
|
41
|
+
<lp name="opennlp-en-tok" timestamp="2013-06-11T13:41:37Z" version="1.0"/>
|
42
|
+
<lp name="opennlp-en-sent" timestamp="2013-06-11T13:41:37Z" version="1.0"/>
|
43
|
+
</linguisticProcessors>
|
44
|
+
<linguisticProcessor layer="term">
|
45
|
+
<lp timestamp="2013-06-12T15:18:03CEST" version="1.0" name="Open nlp pos tagger"/>
|
46
|
+
</linguisticProcessor>
|
47
|
+
</kafHeader>
|
48
|
+
<text>
|
49
|
+
<wf length="4" offset="0" para="1" sent="1" wid="w1">this</wf>
|
50
|
+
<wf length="2" offset="5" para="1" sent="1" wid="w2">is</wf>
|
51
|
+
<wf length="2" offset="8" para="1" sent="1" wid="w3">an</wf>
|
52
|
+
<wf length="7" offset="11" para="1" sent="1" wid="w4">english</wf>
|
53
|
+
<wf length="4" offset="19" para="1" sent="1" wid="w5">text</wf>
|
54
|
+
</text>
|
55
|
+
<terms>
|
56
|
+
<term lemma="this" morphofeat="FM" pos="O" tid="t_1" type="open">
|
57
|
+
<span>
|
58
|
+
<target id="w1"/>
|
59
|
+
</span>
|
60
|
+
</term>
|
61
|
+
<term lemma="is" morphofeat="FM" pos="O" tid="t_2" type="open">
|
62
|
+
<span>
|
63
|
+
<target id="w2"/>
|
64
|
+
</span>
|
65
|
+
</term>
|
66
|
+
<term lemma="an" morphofeat="APPR" pos="P" tid="t_3" type="close">
|
67
|
+
<span>
|
68
|
+
<target id="w3"/>
|
69
|
+
</span>
|
70
|
+
</term>
|
71
|
+
<term lemma="english" morphofeat="FM" pos="O" tid="t_4" type="open">
|
72
|
+
<span>
|
73
|
+
<target id="w4"/>
|
74
|
+
</span>
|
75
|
+
</term>
|
76
|
+
<term lemma="text" morphofeat="FM" pos="O" tid="t_5" type="open">
|
77
|
+
<span>
|
78
|
+
<target id="w5"/>
|
79
|
+
</span>
|
80
|
+
</term>
|
81
|
+
</terms>
|
82
|
+
</KAF>
|
83
|
+
|
84
|
+
### Webservices
|
85
|
+
|
86
|
+
You can launch a language identification webservice by executing:
|
87
|
+
|
88
|
+
pos-tagger-server
|
89
|
+
|
90
|
+
This will launch a mini webserver with the webservice. It defaults to port 9292,
|
91
|
+
so you can access it at <http://localhost:9292>.
|
92
|
+
|
93
|
+
To launch it on a different port provide the `-p [port-number]` option like
|
94
|
+
this:
|
95
|
+
|
96
|
+
pos-tagger-server -p 1234
|
97
|
+
|
98
|
+
It then launches at <http://localhost:1234>
|
99
|
+
|
100
|
+
Documentation on the Webservice is provided by surfing to the urls provided
|
101
|
+
above. For more information on how to launch a webservice run the command with
|
102
|
+
the ```-h``` option.
|
103
|
+
|
104
|
+
|
105
|
+
### Daemon
|
106
|
+
|
107
|
+
Last but not least the POS tagger comes shipped with a daemon that
|
108
|
+
can read jobs (and write) jobs to and from Amazon SQS queues. For more
|
109
|
+
information type:
|
110
|
+
|
111
|
+
pos-tagger-daemon -h
|
112
|
+
|
113
|
+
Description of dependencies
|
114
|
+
---------------------------
|
115
|
+
|
116
|
+
This component runs best if you run it in an environment suited for OpeNER
|
117
|
+
components. You can find an installation guide and helper tools in the (OpeNER
|
118
|
+
installer)[https://github.com/opener-project/opener-installer] and (an
|
119
|
+
installation guide on the Opener
|
120
|
+
Website)[http://opener-project.github.io/getting-started/how-to/local-installation.html]
|
121
|
+
|
122
|
+
At least you need the following system setup:
|
123
|
+
|
124
|
+
### Depenencies for normal use:
|
125
|
+
|
126
|
+
* JRuby (1.7.9+)
|
127
|
+
* Java 1.7 or newer (There are problems with encoding in older versions).
|
128
|
+
|
129
|
+
### Dependencies if you want to modify the component:
|
130
|
+
|
131
|
+
* Maven (for building the Gem)
|
132
|
+
|
133
|
+
Language Extension
|
134
|
+
------------------
|
135
|
+
|
136
|
+
TODO
|
137
|
+
|
138
|
+
The Core
|
139
|
+
--------
|
140
|
+
|
141
|
+
The component is a fat wrapper around the actual language technology core. You
|
142
|
+
can find the core technolies in the following repositories: (https://github.com/opener-project/?query=pos)[https://github.com/opener-project/?query=pos]
|
143
|
+
|
144
|
+
Where to go from here
|
145
|
+
---------------------
|
146
|
+
|
147
|
+
* Check (the project websitere)[http://opener-project.github.io]
|
148
|
+
* (Checkout the webservice)[http://opener.olery.com/pos-tagger]
|
149
|
+
|
150
|
+
Report problem/Get help
|
151
|
+
-----------------------
|
152
|
+
|
153
|
+
If you encounter problems, please email support@opener-project.eu or leave an
|
154
|
+
issue in the (issue tracker)[https://github.com/opener-project/pos-tagger/issues].
|
155
|
+
|
156
|
+
|
157
|
+
Contributing
|
158
|
+
------------
|
159
|
+
|
160
|
+
1. Fork it ( http://github.com/opener-project/pos-tagger/fork )
|
161
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
162
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
163
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
164
|
+
5. Create new Pull Request
|
data/bin/pos-tagger
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/opener/pos_tagger/server'
|
4
|
+
|
5
|
+
# Without calling `Rack::Server#options` manually the CLI arguments will never
|
6
|
+
# be passed, thus the application can't be specified as a constructor argument.
|
7
|
+
server = Rack::Server.new
|
8
|
+
server.options[:config] = File.expand_path('../../config.ru', __FILE__)
|
9
|
+
|
10
|
+
server.start
|
data/config.ru
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'opener/pos_taggers/base'
|
2
|
+
require 'opener/pos_taggers/en'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'open3'
|
5
|
+
require 'optparse'
|
6
|
+
|
7
|
+
require_relative 'pos_tagger/version'
|
8
|
+
require_relative 'pos_tagger/cli'
|
9
|
+
|
10
|
+
module Opener
|
11
|
+
##
|
12
|
+
# Primary POS tagger class that delegates work the various POS tagging
|
13
|
+
# kernels.
|
14
|
+
#
|
15
|
+
# @!attribute [r] options
|
16
|
+
# @return [Hash]
|
17
|
+
#
|
18
|
+
class POSTagger
|
19
|
+
attr_reader :options
|
20
|
+
|
21
|
+
##
|
22
|
+
# Hash containing the default options to use.
|
23
|
+
#
|
24
|
+
# @return [Hash]
|
25
|
+
#
|
26
|
+
DEFAULT_OPTIONS = {
|
27
|
+
:args => []
|
28
|
+
}.freeze
|
29
|
+
|
30
|
+
##
|
31
|
+
# @param [Hash] options
|
32
|
+
#
|
33
|
+
# @option options [Array] :args Arbitrary arguments to pass to the
|
34
|
+
# underlying kernel.
|
35
|
+
#
|
36
|
+
def initialize(options = {})
|
37
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
38
|
+
end
|
39
|
+
|
40
|
+
##
|
41
|
+
# Processes the input and returns an Array containing the output of STDOUT,
|
42
|
+
# STDERR and an object containing process information.
|
43
|
+
#
|
44
|
+
# @param [String] input The input to process.
|
45
|
+
# @return [Array]
|
46
|
+
#
|
47
|
+
def run(input)
|
48
|
+
language = language_from_kaf(input)
|
49
|
+
|
50
|
+
unless valid_language?(language)
|
51
|
+
raise ArgumentError, "The specified language (#{language}) is invalid"
|
52
|
+
end
|
53
|
+
|
54
|
+
kernel = language_constant(language).new(:args => options[:args])
|
55
|
+
|
56
|
+
return kernel.run(input)
|
57
|
+
end
|
58
|
+
|
59
|
+
alias tag run
|
60
|
+
|
61
|
+
protected
|
62
|
+
|
63
|
+
##
|
64
|
+
# Extracts the language from a KAF document.
|
65
|
+
#
|
66
|
+
# @param [String] input
|
67
|
+
# @return [String]
|
68
|
+
#
|
69
|
+
def language_from_kaf(input)
|
70
|
+
reader = Nokogiri::XML::Reader(input)
|
71
|
+
|
72
|
+
return reader.read.lang
|
73
|
+
end
|
74
|
+
|
75
|
+
##
|
76
|
+
# @param [String] language
|
77
|
+
# @return [Class]
|
78
|
+
#
|
79
|
+
def language_constant(language)
|
80
|
+
return language && POSTaggers.const_get(language.upcase)
|
81
|
+
end
|
82
|
+
|
83
|
+
##
|
84
|
+
# @return [TrueClass|FalseClass]
|
85
|
+
#
|
86
|
+
def valid_language?(language)
|
87
|
+
return Opener::POSTaggers.const_defined?(language.upcase)
|
88
|
+
end
|
89
|
+
end # POSTagger
|
90
|
+
end # Opener
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module Opener
|
4
|
+
class POSTagger
|
5
|
+
##
|
6
|
+
# CLI wrapper around {Opener::POSTagger} using OptionParser.
|
7
|
+
#
|
8
|
+
# @!attribute [r] options
|
9
|
+
# @return [Hash]
|
10
|
+
# @!attribute [r] option_parser
|
11
|
+
# @return [OptionParser]
|
12
|
+
#
|
13
|
+
class CLI
|
14
|
+
attr_reader :options, :option_parser
|
15
|
+
|
16
|
+
##
|
17
|
+
# @param [Hash] options
|
18
|
+
#
|
19
|
+
def initialize(options = {})
|
20
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
21
|
+
|
22
|
+
@option_parser = ::OptionParser.new do |opts|
|
23
|
+
opts.program_name = 'pos-tagger'
|
24
|
+
opts.summary_indent = ' '
|
25
|
+
|
26
|
+
opts.on('-h', '--help', 'Shows this help message') do
|
27
|
+
show_help
|
28
|
+
end
|
29
|
+
|
30
|
+
opts.on('-v', '--version', 'Shows the current version') do
|
31
|
+
show_version
|
32
|
+
end
|
33
|
+
|
34
|
+
opts.separator <<-EOF
|
35
|
+
|
36
|
+
Examples:
|
37
|
+
|
38
|
+
cat example.kaf | #{opts.program_name}
|
39
|
+
EOF
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
##
|
44
|
+
# @param [String] input
|
45
|
+
#
|
46
|
+
def run(input)
|
47
|
+
option_parser.parse!(options[:args])
|
48
|
+
|
49
|
+
tagger = POSTagger.new(options)
|
50
|
+
|
51
|
+
stdout = tagger.run(input)
|
52
|
+
|
53
|
+
puts stdout
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
##
|
59
|
+
# Shows the help message and exits the program.
|
60
|
+
#
|
61
|
+
def show_help
|
62
|
+
abort option_parser.to_s
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# Shows the version and exits the program.
|
67
|
+
#
|
68
|
+
def show_version
|
69
|
+
abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
|
70
|
+
end
|
71
|
+
end # CLI
|
72
|
+
end # POSTagger
|
73
|
+
end # Opener
|
@@ -0,0 +1,283 @@
|
|
1
|
+
input[type="text"], textarea
|
2
|
+
{
|
3
|
+
width: 500px;
|
4
|
+
}
|
5
|
+
|
6
|
+
body {
|
7
|
+
font-family: Helvetica, arial, sans-serif;
|
8
|
+
font-size: 14px;
|
9
|
+
line-height: 1.6;
|
10
|
+
padding-top: 10px;
|
11
|
+
padding-bottom: 10px;
|
12
|
+
background-color: white;
|
13
|
+
padding: 30px; }
|
14
|
+
|
15
|
+
body > *:first-child {
|
16
|
+
margin-top: 0 !important; }
|
17
|
+
body > *:last-child {
|
18
|
+
margin-bottom: 0 !important; }
|
19
|
+
|
20
|
+
a {
|
21
|
+
color: #4183C4; }
|
22
|
+
a.absent {
|
23
|
+
color: #cc0000; }
|
24
|
+
a.anchor {
|
25
|
+
display: block;
|
26
|
+
padding-left: 30px;
|
27
|
+
margin-left: -30px;
|
28
|
+
cursor: pointer;
|
29
|
+
position: absolute;
|
30
|
+
top: 0;
|
31
|
+
left: 0;
|
32
|
+
bottom: 0; }
|
33
|
+
|
34
|
+
h1, h2, h3, h4, h5, h6 {
|
35
|
+
margin: 20px 0 10px;
|
36
|
+
padding: 0;
|
37
|
+
font-weight: bold;
|
38
|
+
-webkit-font-smoothing: antialiased;
|
39
|
+
cursor: text;
|
40
|
+
position: relative; }
|
41
|
+
|
42
|
+
h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, h5:hover a.anchor, h6:hover a.anchor {
|
43
|
+
background: url("../../images/modules/styleguide/para.png") no-repeat 10px center;
|
44
|
+
text-decoration: none; }
|
45
|
+
|
46
|
+
h1 tt, h1 code {
|
47
|
+
font-size: inherit; }
|
48
|
+
|
49
|
+
h2 tt, h2 code {
|
50
|
+
font-size: inherit; }
|
51
|
+
|
52
|
+
h3 tt, h3 code {
|
53
|
+
font-size: inherit; }
|
54
|
+
|
55
|
+
h4 tt, h4 code {
|
56
|
+
font-size: inherit; }
|
57
|
+
|
58
|
+
h5 tt, h5 code {
|
59
|
+
font-size: inherit; }
|
60
|
+
|
61
|
+
h6 tt, h6 code {
|
62
|
+
font-size: inherit; }
|
63
|
+
|
64
|
+
h1 {
|
65
|
+
font-size: 28px;
|
66
|
+
color: black; }
|
67
|
+
|
68
|
+
h2 {
|
69
|
+
font-size: 24px;
|
70
|
+
border-bottom: 1px solid #cccccc;
|
71
|
+
color: black; }
|
72
|
+
|
73
|
+
h3 {
|
74
|
+
font-size: 18px; }
|
75
|
+
|
76
|
+
h4 {
|
77
|
+
font-size: 16px; }
|
78
|
+
|
79
|
+
h5 {
|
80
|
+
font-size: 14px; }
|
81
|
+
|
82
|
+
h6 {
|
83
|
+
color: #777777;
|
84
|
+
font-size: 14px; }
|
85
|
+
|
86
|
+
p, blockquote, ul, ol, dl, li, table, pre {
|
87
|
+
margin: 15px 0; }
|
88
|
+
|
89
|
+
hr {
|
90
|
+
background: transparent url("../../images/modules/pulls/dirty-shade.png") repeat-x 0 0;
|
91
|
+
border: 0 none;
|
92
|
+
color: #cccccc;
|
93
|
+
height: 4px;
|
94
|
+
padding: 0; }
|
95
|
+
|
96
|
+
body > h2:first-child {
|
97
|
+
margin-top: 0;
|
98
|
+
padding-top: 0; }
|
99
|
+
body > h1:first-child {
|
100
|
+
margin-top: 0;
|
101
|
+
padding-top: 0; }
|
102
|
+
body > h1:first-child + h2 {
|
103
|
+
margin-top: 0;
|
104
|
+
padding-top: 0; }
|
105
|
+
body > h3:first-child, body > h4:first-child, body > h5:first-child, body > h6:first-child {
|
106
|
+
margin-top: 0;
|
107
|
+
padding-top: 0; }
|
108
|
+
|
109
|
+
a:first-child h1, a:first-child h2, a:first-child h3, a:first-child h4, a:first-child h5, a:first-child h6 {
|
110
|
+
margin-top: 0;
|
111
|
+
padding-top: 0; }
|
112
|
+
|
113
|
+
h1 p, h2 p, h3 p, h4 p, h5 p, h6 p {
|
114
|
+
margin-top: 0; }
|
115
|
+
|
116
|
+
li p.first {
|
117
|
+
display: inline-block; }
|
118
|
+
|
119
|
+
ul, ol {
|
120
|
+
padding-left: 30px; }
|
121
|
+
|
122
|
+
ul :first-child, ol :first-child {
|
123
|
+
margin-top: 0; }
|
124
|
+
|
125
|
+
ul :last-child, ol :last-child {
|
126
|
+
margin-bottom: 0; }
|
127
|
+
|
128
|
+
dl {
|
129
|
+
padding: 0; }
|
130
|
+
dl dt {
|
131
|
+
font-size: 14px;
|
132
|
+
font-weight: bold;
|
133
|
+
font-style: italic;
|
134
|
+
padding: 0;
|
135
|
+
margin: 15px 0 5px; }
|
136
|
+
dl dt:first-child {
|
137
|
+
padding: 0; }
|
138
|
+
dl dt > :first-child {
|
139
|
+
margin-top: 0; }
|
140
|
+
dl dt > :last-child {
|
141
|
+
margin-bottom: 0; }
|
142
|
+
dl dd {
|
143
|
+
margin: 0 0 15px;
|
144
|
+
padding: 0 15px; }
|
145
|
+
dl dd > :first-child {
|
146
|
+
margin-top: 0; }
|
147
|
+
dl dd > :last-child {
|
148
|
+
margin-bottom: 0; }
|
149
|
+
|
150
|
+
blockquote {
|
151
|
+
border-left: 4px solid #dddddd;
|
152
|
+
padding: 0 15px;
|
153
|
+
color: #777777; }
|
154
|
+
blockquote > :first-child {
|
155
|
+
margin-top: 0; }
|
156
|
+
blockquote > :last-child {
|
157
|
+
margin-bottom: 0; }
|
158
|
+
|
159
|
+
table {
|
160
|
+
padding: 0; }
|
161
|
+
table tr {
|
162
|
+
border-top: 1px solid #cccccc;
|
163
|
+
background-color: white;
|
164
|
+
margin: 0;
|
165
|
+
padding: 0; }
|
166
|
+
table tr:nth-child(2n) {
|
167
|
+
background-color: #f8f8f8; }
|
168
|
+
table tr th {
|
169
|
+
font-weight: bold;
|
170
|
+
border: 1px solid #cccccc;
|
171
|
+
text-align: left;
|
172
|
+
margin: 0;
|
173
|
+
padding: 6px 13px; }
|
174
|
+
table tr td {
|
175
|
+
border: 1px solid #cccccc;
|
176
|
+
text-align: left;
|
177
|
+
margin: 0;
|
178
|
+
padding: 6px 13px; }
|
179
|
+
table tr th :first-child, table tr td :first-child {
|
180
|
+
margin-top: 0; }
|
181
|
+
table tr th :last-child, table tr td :last-child {
|
182
|
+
margin-bottom: 0; }
|
183
|
+
|
184
|
+
img {
|
185
|
+
max-width: 100%; }
|
186
|
+
|
187
|
+
span.frame {
|
188
|
+
display: block;
|
189
|
+
overflow: hidden; }
|
190
|
+
span.frame > span {
|
191
|
+
border: 1px solid #dddddd;
|
192
|
+
display: block;
|
193
|
+
float: left;
|
194
|
+
overflow: hidden;
|
195
|
+
margin: 13px 0 0;
|
196
|
+
padding: 7px;
|
197
|
+
width: auto; }
|
198
|
+
span.frame span img {
|
199
|
+
display: block;
|
200
|
+
float: left; }
|
201
|
+
span.frame span span {
|
202
|
+
clear: both;
|
203
|
+
color: #333333;
|
204
|
+
display: block;
|
205
|
+
padding: 5px 0 0; }
|
206
|
+
span.align-center {
|
207
|
+
display: block;
|
208
|
+
overflow: hidden;
|
209
|
+
clear: both; }
|
210
|
+
span.align-center > span {
|
211
|
+
display: block;
|
212
|
+
overflow: hidden;
|
213
|
+
margin: 13px auto 0;
|
214
|
+
text-align: center; }
|
215
|
+
span.align-center span img {
|
216
|
+
margin: 0 auto;
|
217
|
+
text-align: center; }
|
218
|
+
span.align-right {
|
219
|
+
display: block;
|
220
|
+
overflow: hidden;
|
221
|
+
clear: both; }
|
222
|
+
span.align-right > span {
|
223
|
+
display: block;
|
224
|
+
overflow: hidden;
|
225
|
+
margin: 13px 0 0;
|
226
|
+
text-align: right; }
|
227
|
+
span.align-right span img {
|
228
|
+
margin: 0;
|
229
|
+
text-align: right; }
|
230
|
+
span.float-left {
|
231
|
+
display: block;
|
232
|
+
margin-right: 13px;
|
233
|
+
overflow: hidden;
|
234
|
+
float: left; }
|
235
|
+
span.float-left span {
|
236
|
+
margin: 13px 0 0; }
|
237
|
+
span.float-right {
|
238
|
+
display: block;
|
239
|
+
margin-left: 13px;
|
240
|
+
overflow: hidden;
|
241
|
+
float: right; }
|
242
|
+
span.float-right > span {
|
243
|
+
display: block;
|
244
|
+
overflow: hidden;
|
245
|
+
margin: 13px auto 0;
|
246
|
+
text-align: right; }
|
247
|
+
|
248
|
+
code, tt {
|
249
|
+
margin: 0 2px;
|
250
|
+
padding: 0 5px;
|
251
|
+
white-space: nowrap;
|
252
|
+
border: 1px solid #eaeaea;
|
253
|
+
background-color: #f8f8f8;
|
254
|
+
border-radius: 3px; }
|
255
|
+
|
256
|
+
pre code {
|
257
|
+
margin: 0;
|
258
|
+
padding: 0;
|
259
|
+
white-space: pre;
|
260
|
+
border: none;
|
261
|
+
background: transparent; }
|
262
|
+
|
263
|
+
.highlight pre {
|
264
|
+
background-color: #f8f8f8;
|
265
|
+
border: 1px solid #cccccc;
|
266
|
+
font-size: 13px;
|
267
|
+
line-height: 19px;
|
268
|
+
overflow: auto;
|
269
|
+
padding: 6px 10px;
|
270
|
+
border-radius: 3px; }
|
271
|
+
|
272
|
+
pre {
|
273
|
+
background-color: #f8f8f8;
|
274
|
+
border: 1px solid #cccccc;
|
275
|
+
font-size: 13px;
|
276
|
+
line-height: 19px;
|
277
|
+
overflow: auto;
|
278
|
+
padding: 6px 10px;
|
279
|
+
border-radius: 3px; }
|
280
|
+
pre code, pre tt {
|
281
|
+
background-color: transparent;
|
282
|
+
border: none; }
|
283
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'sinatra/base'
|
2
|
+
require 'httpclient'
|
3
|
+
require 'opener/webservice'
|
4
|
+
|
5
|
+
module Opener
|
6
|
+
class POSTagger
|
7
|
+
##
|
8
|
+
# POS Tagger server powered by Sinatra.
|
9
|
+
#
|
10
|
+
class Server < Webservice
|
11
|
+
set :views, File.expand_path('../views', __FILE__)
|
12
|
+
text_processor POSTagger
|
13
|
+
accepted_params :input
|
14
|
+
end # Server
|
15
|
+
end # POSTagger
|
16
|
+
end # Opener
|
@@ -0,0 +1,163 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link type="text/css" rel="stylesheet" charset="UTF-8" href="markdown.css"/>
|
5
|
+
<title>POS Tagger Webservice</title>
|
6
|
+
</head>
|
7
|
+
<body>
|
8
|
+
<h1>POS Tagger Web Service</h1>
|
9
|
+
|
10
|
+
<h2>Example Usage</h2>
|
11
|
+
|
12
|
+
<p>
|
13
|
+
<pre>pos-tagger-server start</pre>
|
14
|
+
<pre>curl -d 'input=<?xml version="1.0" encoding="UTF-8" standalone="no"?><KAF version="v1.opener" xml:lang="en"><kafHeader><linguisticProcessors layer="text"><lp name="opennlp-en-tok" timestamp="2013-06-11T13:41:37Z" version="1.0"/><lp name="opennlp-en-sent" timestamp="2013-06-11T13:41:37Z" version="1.0"/></linguisticProcessors></kafHeader><text><wf length="4" offset="0" para="1" sent="1" wid="w1">this</wf><wf length="2" offset="5" para="1" sent="1" wid="w2">is</wf><wf length="2" offset="8" para="1" sent="1" wid="w3">an</wf><wf length="7" offset="11" para="1" sent="1" wid="w4">english</wf><wf length="4" offset="19" para="1" sent="1" wid="w5">text</wf></text></KAF>' http://localhost:9292 -XPOST</pre>
|
15
|
+
|
16
|
+
outputs:
|
17
|
+
|
18
|
+
<pre>
|
19
|
+
<?xml version='1.0' encoding='UTF-8'?>
|
20
|
+
<KAF version="v1.opener" xml:lang="en">
|
21
|
+
<kafHeader>
|
22
|
+
<linguisticProcessors layer="text">
|
23
|
+
<lp name="opennlp-en-tok" timestamp="2013-06-11T13:41:37Z" version="1.0"/>
|
24
|
+
<lp name="opennlp-en-sent" timestamp="2013-06-11T13:41:37Z" version="1.0"/>
|
25
|
+
</linguisticProcessors>
|
26
|
+
<linguisticProcessor layer="term">
|
27
|
+
<lp timestamp="2013-06-12T15:18:03CEST" version="1.0" name="Open nlp pos tagger"/>
|
28
|
+
</linguisticProcessor>
|
29
|
+
</kafHeader>
|
30
|
+
<text>
|
31
|
+
<wf length="4" offset="0" para="1" sent="1" wid="w1">this</wf>
|
32
|
+
<wf length="2" offset="5" para="1" sent="1" wid="w2">is</wf>
|
33
|
+
<wf length="2" offset="8" para="1" sent="1" wid="w3">an</wf>
|
34
|
+
<wf length="7" offset="11" para="1" sent="1" wid="w4">english</wf>
|
35
|
+
<wf length="4" offset="19" para="1" sent="1" wid="w5">text</wf>
|
36
|
+
</text>
|
37
|
+
<terms>
|
38
|
+
<term lemma="this" morphofeat="FM" pos="O" tid="t_1" type="open">
|
39
|
+
<span>
|
40
|
+
<target id="w1"/>
|
41
|
+
</span>
|
42
|
+
</term>
|
43
|
+
<term lemma="is" morphofeat="FM" pos="O" tid="t_2" type="open">
|
44
|
+
<span>
|
45
|
+
<target id="w2"/>
|
46
|
+
</span>
|
47
|
+
</term>
|
48
|
+
<term lemma="an" morphofeat="APPR" pos="P" tid="t_3" type="close">
|
49
|
+
<span>
|
50
|
+
<target id="w3"/>
|
51
|
+
</span>
|
52
|
+
</term>
|
53
|
+
<term lemma="english" morphofeat="FM" pos="O" tid="t_4" type="open">
|
54
|
+
<span>
|
55
|
+
<target id="w4"/>
|
56
|
+
</span>
|
57
|
+
</term>
|
58
|
+
<term lemma="text" morphofeat="FM" pos="O" tid="t_5" type="open">
|
59
|
+
<span>
|
60
|
+
<target id="w5"/>
|
61
|
+
</span>
|
62
|
+
</term>
|
63
|
+
</terms>
|
64
|
+
</KAF></pre>
|
65
|
+
</p>
|
66
|
+
|
67
|
+
<h2>Try the webservice</h2>
|
68
|
+
|
69
|
+
<p>* required</p>
|
70
|
+
<p>** When entering a value no response will be displayed in the browser.</p>
|
71
|
+
|
72
|
+
<form action="<%=url("/")%>" method="POST">
|
73
|
+
<div>
|
74
|
+
<label for="input"/>Type your text here*</label>
|
75
|
+
<br/>
|
76
|
+
|
77
|
+
<textarea name="input" id="text" rows="10" cols="50"/></textarea>
|
78
|
+
</div>
|
79
|
+
|
80
|
+
<% 10.times do |t| %>
|
81
|
+
<div>
|
82
|
+
<label for="callbacks">Callback URL <%=t+1%>(**)</label>
|
83
|
+
<br />
|
84
|
+
|
85
|
+
<input id="callbacks" type="text" name="callbacks[]" />
|
86
|
+
</div>
|
87
|
+
<% end %>
|
88
|
+
|
89
|
+
|
90
|
+
<div>
|
91
|
+
<label for="error_callback">Error Callback</label>
|
92
|
+
<br />
|
93
|
+
|
94
|
+
<input id="error_callback" type="text" name="error_callback" />
|
95
|
+
</div>
|
96
|
+
<input type="submit" value="Submit" />
|
97
|
+
</form>
|
98
|
+
|
99
|
+
<h2>Actions</h2>
|
100
|
+
|
101
|
+
<p>
|
102
|
+
<dl>
|
103
|
+
<dt>POST /</dt>
|
104
|
+
<dd>Tag the input tokenized text. See arguments listing for more options.</dd>
|
105
|
+
<dt>GET /</dt>
|
106
|
+
<dd>Show this page</dd>
|
107
|
+
</dl>
|
108
|
+
</p>
|
109
|
+
|
110
|
+
<h2>Arguments</h2>
|
111
|
+
|
112
|
+
<p> The webservice takes the following arguments: </p>
|
113
|
+
<p>* required</p>
|
114
|
+
|
115
|
+
<dl>
|
116
|
+
<dt>text*</dt>
|
117
|
+
<dd>The input text in KAF format. Sample KAF input:</dd>
|
118
|
+
<pre>
|
119
|
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
120
|
+
<KAF version="v1.opener" xml:lang="en">
|
121
|
+
<kafHeader>
|
122
|
+
<linguisticProcessors layer="text">
|
123
|
+
<lp name="opennlp-en-tok" timestamp="2013-06-11T13:41:37Z" version="1.0"/>
|
124
|
+
<lp name="opennlp-en-sent" timestamp="2013-06-11T13:41:37Z" version="1.0"/>
|
125
|
+
</linguisticProcessors>
|
126
|
+
</kafHeader>
|
127
|
+
<text>
|
128
|
+
<wf length="4" offset="0" para="1" sent="1" wid="w1">this</wf>
|
129
|
+
<wf length="2" offset="5" para="1" sent="1" wid="w2">is</wf>
|
130
|
+
<wf length="2" offset="8" para="1" sent="1" wid="w3">an</wf>
|
131
|
+
<wf length="7" offset="11" para="1" sent="1" wid="w4">english</wf>
|
132
|
+
<wf length="4" offset="19" para="1" sent="1" wid="w5">text</wf>
|
133
|
+
</text>
|
134
|
+
</KAF></pre>
|
135
|
+
|
136
|
+
<dt>callbacks</dt>
|
137
|
+
<dd>
|
138
|
+
You can provide a list of callback urls. If you provide callback urls
|
139
|
+
the POS tagger will run as a background job and a callback
|
140
|
+
with the results will be performed (POST) to the first url in the callback
|
141
|
+
list. The other urls in callback list will be provided in the "callbacks"
|
142
|
+
argument.<br/><br/>
|
143
|
+
Using callback you can chain together several OpeNER webservices in
|
144
|
+
one call. The first, will call the second, which will call the third, etc.
|
145
|
+
See for more information the <a href="http://opener-project.github.io">
|
146
|
+
webservice documentation online</a>.
|
147
|
+
</dd>
|
148
|
+
<dt>error_callback</dt>
|
149
|
+
<dd>URL to notify if errors occur in the background process. The error
|
150
|
+
callback will do a POST with the error message in the 'error' field.</dd>
|
151
|
+
</dt>
|
152
|
+
|
153
|
+
|
154
|
+
|
155
|
+
</dl>
|
156
|
+
|
157
|
+
|
158
|
+
<p>
|
159
|
+
|
160
|
+
</p>
|
161
|
+
|
162
|
+
</body>
|
163
|
+
</html>
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link type="text/css" rel="stylesheet" charset="UTF-8" href="markdown.css"/>
|
5
|
+
<title>Language Detector Webservice</title>
|
6
|
+
</head>
|
7
|
+
<body>
|
8
|
+
<h1>Output URL</h1>
|
9
|
+
<p>
|
10
|
+
When ready, you can view the result
|
11
|
+
<a href=<%= output_url %>>here</a>
|
12
|
+
</p>
|
13
|
+
|
14
|
+
</body>
|
15
|
+
</html>
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require File.expand_path('../lib/opener/pos_tagger/version', __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = 'opener-pos-tagger'
|
5
|
+
gem.version = Opener::POSTagger::VERSION
|
6
|
+
gem.authors = ['development@olery.com']
|
7
|
+
gem.summary = 'Gem that wraps up the different existing pos-taggers'
|
8
|
+
gem.description = gem.summary
|
9
|
+
gem.homepage = 'http://opener-project.github.com/'
|
10
|
+
gem.has_rdoc = "yard"
|
11
|
+
gem.required_ruby_version = ">= 1.9.2"
|
12
|
+
|
13
|
+
gem.files = Dir.glob([
|
14
|
+
'lib/**/*',
|
15
|
+
'config.ru',
|
16
|
+
'*.gemspec',
|
17
|
+
'README.md'
|
18
|
+
]).select { |file| File.file?(file) }
|
19
|
+
|
20
|
+
gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
|
21
|
+
|
22
|
+
gem.add_dependency 'opener-pos-tagger-base'
|
23
|
+
gem.add_dependency 'opener-pos-tagger-en-es'
|
24
|
+
gem.add_dependency 'opener-webservice'
|
25
|
+
|
26
|
+
gem.add_dependency 'nokogiri'
|
27
|
+
gem.add_dependency 'sinatra', '~>1.4.2'
|
28
|
+
gem.add_dependency 'httpclient'
|
29
|
+
|
30
|
+
gem.add_development_dependency 'rspec'
|
31
|
+
gem.add_development_dependency 'cucumber'
|
32
|
+
gem.add_development_dependency 'pry'
|
33
|
+
gem.add_development_dependency 'rake'
|
34
|
+
end
|
35
|
+
|
metadata
ADDED
@@ -0,0 +1,197 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: opener-pos-tagger
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- development@olery.com
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: opener-pos-tagger-base
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: opener-pos-tagger-en-es
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: opener-webservice
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: nokogiri
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: sinatra
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 1.4.2
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.4.2
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: httpclient
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rspec
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: cucumber
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: pry
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: rake
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
description: Gem that wraps up the different existing pos-taggers
|
154
|
+
email:
|
155
|
+
executables:
|
156
|
+
- pos-tagger-server
|
157
|
+
- pos-tagger
|
158
|
+
extensions: []
|
159
|
+
extra_rdoc_files: []
|
160
|
+
files:
|
161
|
+
- README.md
|
162
|
+
- bin/pos-tagger
|
163
|
+
- bin/pos-tagger-server
|
164
|
+
- config.ru
|
165
|
+
- lib/opener/pos_tagger.rb
|
166
|
+
- lib/opener/pos_tagger/cli.rb
|
167
|
+
- lib/opener/pos_tagger/public/markdown.css
|
168
|
+
- lib/opener/pos_tagger/server.rb
|
169
|
+
- lib/opener/pos_tagger/version.rb
|
170
|
+
- lib/opener/pos_tagger/views/index.erb
|
171
|
+
- lib/opener/pos_tagger/views/result.erb
|
172
|
+
- opener-pos-tagger.gemspec
|
173
|
+
homepage: http://opener-project.github.com/
|
174
|
+
licenses: []
|
175
|
+
metadata: {}
|
176
|
+
post_install_message:
|
177
|
+
rdoc_options: []
|
178
|
+
require_paths:
|
179
|
+
- lib
|
180
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
181
|
+
requirements:
|
182
|
+
- - ">="
|
183
|
+
- !ruby/object:Gem::Version
|
184
|
+
version: 1.9.2
|
185
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
186
|
+
requirements:
|
187
|
+
- - ">="
|
188
|
+
- !ruby/object:Gem::Version
|
189
|
+
version: '0'
|
190
|
+
requirements: []
|
191
|
+
rubyforge_project:
|
192
|
+
rubygems_version: 2.2.2
|
193
|
+
signing_key:
|
194
|
+
specification_version: 4
|
195
|
+
summary: Gem that wraps up the different existing pos-taggers
|
196
|
+
test_files: []
|
197
|
+
has_rdoc: yard
|