opener-pos-tagger 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +164 -0
- data/bin/pos-tagger +7 -0
- data/bin/pos-tagger-server +10 -0
- data/config.ru +4 -0
- data/lib/opener/pos_tagger.rb +90 -0
- data/lib/opener/pos_tagger/cli.rb +73 -0
- data/lib/opener/pos_tagger/public/markdown.css +283 -0
- data/lib/opener/pos_tagger/server.rb +16 -0
- data/lib/opener/pos_tagger/version.rb +5 -0
- data/lib/opener/pos_tagger/views/index.erb +163 -0
- data/lib/opener/pos_tagger/views/result.erb +15 -0
- data/opener-pos-tagger.gemspec +35 -0
- metadata +197 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ccc6c90ace4f3e79af9d820dd7b773ccffdd65fe
|
4
|
+
data.tar.gz: 3af96ef7ef65f6210ff076f3c8dc605e5394ec0f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 70f3208c340084a7fa7c5a7f1701c3aa4bdd39f7bbe43b47a652c6da09179eedf7d3a5ef1f5680f827c953e6063935af11ca343d10a38e7552f47b95a08773a6
|
7
|
+
data.tar.gz: a72e6d5d3cdd505179e9bac632ba9bd9cf125a34a2b579942c8a98b7d62f7ebb34cec01e50f90d4fd22a9257f6e44ccbe22798e35730f95fe381af0b15d2801a
|
data/README.md
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
POS-tagger
|
2
|
+
------------
|
3
|
+
|
4
|
+
Component that wraps the different existing POS Taggers based on OpenNLP.
|
5
|
+
|
6
|
+
### Confused by some terminology?
|
7
|
+
|
8
|
+
This software is part of a larger collection of natural language processing
|
9
|
+
tools known as "the OpeNER project". You can find more information about the
|
10
|
+
project at (the OpeNER portal)[http://opener-project.github.io]. There you can
|
11
|
+
also find references to terms like KAF (an XML standard to represent linguistic
|
12
|
+
annotations in texts), component, cores, scenario's and pipelines.
|
13
|
+
|
14
|
+
Quick Use Example
|
15
|
+
-----------------
|
16
|
+
|
17
|
+
Installing the pos-tagger can be done by executing:
|
18
|
+
|
19
|
+
gem install opener-pos-tagger
|
20
|
+
|
21
|
+
Please bare in mind that all components in OpeNER take KAF as an input and
|
22
|
+
output KAF by default.
|
23
|
+
|
24
|
+
### Command line interface
|
25
|
+
|
26
|
+
You should now be able to call the POS tagger as a regular shell
|
27
|
+
command: by its name. Once installed the gem normalyl sits in your path so you can call it directly from anywhere.
|
28
|
+
|
29
|
+
This aplication reads a text from standard input in order to identify the language.
|
30
|
+
|
31
|
+
POS Tagging some text (assuming that the above text is in a file called *english.kaf*):
|
32
|
+
|
33
|
+
cat english.kaf | pos-tagger
|
34
|
+
|
35
|
+
Will result in
|
36
|
+
|
37
|
+
<?xml version='1.0' encoding='UTF-8'?>
|
38
|
+
<KAF version="v1.opener" xml:lang="en">
|
39
|
+
<kafHeader>
|
40
|
+
<linguisticProcessors layer="text">
|
41
|
+
<lp name="opennlp-en-tok" timestamp="2013-06-11T13:41:37Z" version="1.0"/>
|
42
|
+
<lp name="opennlp-en-sent" timestamp="2013-06-11T13:41:37Z" version="1.0"/>
|
43
|
+
</linguisticProcessors>
|
44
|
+
<linguisticProcessor layer="term">
|
45
|
+
<lp timestamp="2013-06-12T15:18:03CEST" version="1.0" name="Open nlp pos tagger"/>
|
46
|
+
</linguisticProcessor>
|
47
|
+
</kafHeader>
|
48
|
+
<text>
|
49
|
+
<wf length="4" offset="0" para="1" sent="1" wid="w1">this</wf>
|
50
|
+
<wf length="2" offset="5" para="1" sent="1" wid="w2">is</wf>
|
51
|
+
<wf length="2" offset="8" para="1" sent="1" wid="w3">an</wf>
|
52
|
+
<wf length="7" offset="11" para="1" sent="1" wid="w4">english</wf>
|
53
|
+
<wf length="4" offset="19" para="1" sent="1" wid="w5">text</wf>
|
54
|
+
</text>
|
55
|
+
<terms>
|
56
|
+
<term lemma="this" morphofeat="FM" pos="O" tid="t_1" type="open">
|
57
|
+
<span>
|
58
|
+
<target id="w1"/>
|
59
|
+
</span>
|
60
|
+
</term>
|
61
|
+
<term lemma="is" morphofeat="FM" pos="O" tid="t_2" type="open">
|
62
|
+
<span>
|
63
|
+
<target id="w2"/>
|
64
|
+
</span>
|
65
|
+
</term>
|
66
|
+
<term lemma="an" morphofeat="APPR" pos="P" tid="t_3" type="close">
|
67
|
+
<span>
|
68
|
+
<target id="w3"/>
|
69
|
+
</span>
|
70
|
+
</term>
|
71
|
+
<term lemma="english" morphofeat="FM" pos="O" tid="t_4" type="open">
|
72
|
+
<span>
|
73
|
+
<target id="w4"/>
|
74
|
+
</span>
|
75
|
+
</term>
|
76
|
+
<term lemma="text" morphofeat="FM" pos="O" tid="t_5" type="open">
|
77
|
+
<span>
|
78
|
+
<target id="w5"/>
|
79
|
+
</span>
|
80
|
+
</term>
|
81
|
+
</terms>
|
82
|
+
</KAF>
|
83
|
+
|
84
|
+
### Webservices
|
85
|
+
|
86
|
+
You can launch a language identification webservice by executing:
|
87
|
+
|
88
|
+
pos-tagger-server
|
89
|
+
|
90
|
+
This will launch a mini webserver with the webservice. It defaults to port 9292,
|
91
|
+
so you can access it at <http://localhost:9292>.
|
92
|
+
|
93
|
+
To launch it on a different port provide the `-p [port-number]` option like
|
94
|
+
this:
|
95
|
+
|
96
|
+
pos-tagger-server -p 1234
|
97
|
+
|
98
|
+
It then launches at <http://localhost:1234>
|
99
|
+
|
100
|
+
Documentation on the Webservice is provided by surfing to the urls provided
|
101
|
+
above. For more information on how to launch a webservice run the command with
|
102
|
+
the ```-h``` option.
|
103
|
+
|
104
|
+
|
105
|
+
### Daemon
|
106
|
+
|
107
|
+
Last but not least the POS tagger comes shipped with a daemon that
|
108
|
+
can read jobs (and write) jobs to and from Amazon SQS queues. For more
|
109
|
+
information type:
|
110
|
+
|
111
|
+
pos-tagger-daemon -h
|
112
|
+
|
113
|
+
Description of dependencies
|
114
|
+
---------------------------
|
115
|
+
|
116
|
+
This component runs best if you run it in an environment suited for OpeNER
|
117
|
+
components. You can find an installation guide and helper tools in the (OpeNER
|
118
|
+
installer)[https://github.com/opener-project/opener-installer] and (an
|
119
|
+
installation guide on the Opener
|
120
|
+
Website)[http://opener-project.github.io/getting-started/how-to/local-installation.html]
|
121
|
+
|
122
|
+
At least you need the following system setup:
|
123
|
+
|
124
|
+
### Depenencies for normal use:
|
125
|
+
|
126
|
+
* JRuby (1.7.9+)
|
127
|
+
* Java 1.7 or newer (There are problems with encoding in older versions).
|
128
|
+
|
129
|
+
### Dependencies if you want to modify the component:
|
130
|
+
|
131
|
+
* Maven (for building the Gem)
|
132
|
+
|
133
|
+
Language Extension
|
134
|
+
------------------
|
135
|
+
|
136
|
+
TODO
|
137
|
+
|
138
|
+
The Core
|
139
|
+
--------
|
140
|
+
|
141
|
+
The component is a fat wrapper around the actual language technology core. You
|
142
|
+
can find the core technolies in the following repositories: (https://github.com/opener-project/?query=pos)[https://github.com/opener-project/?query=pos]
|
143
|
+
|
144
|
+
Where to go from here
|
145
|
+
---------------------
|
146
|
+
|
147
|
+
* Check (the project websitere)[http://opener-project.github.io]
|
148
|
+
* (Checkout the webservice)[http://opener.olery.com/pos-tagger]
|
149
|
+
|
150
|
+
Report problem/Get help
|
151
|
+
-----------------------
|
152
|
+
|
153
|
+
If you encounter problems, please email support@opener-project.eu or leave an
|
154
|
+
issue in the (issue tracker)[https://github.com/opener-project/pos-tagger/issues].
|
155
|
+
|
156
|
+
|
157
|
+
Contributing
|
158
|
+
------------
|
159
|
+
|
160
|
+
1. Fork it ( http://github.com/opener-project/pos-tagger/fork )
|
161
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
162
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
163
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
164
|
+
5. Create new Pull Request
|
data/bin/pos-tagger
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/opener/pos_tagger/server'
|
4
|
+
|
5
|
+
# Without calling `Rack::Server#options` manually the CLI arguments will never
|
6
|
+
# be passed, thus the application can't be specified as a constructor argument.
|
7
|
+
server = Rack::Server.new
|
8
|
+
server.options[:config] = File.expand_path('../../config.ru', __FILE__)
|
9
|
+
|
10
|
+
server.start
|
data/config.ru
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'opener/pos_taggers/base'
|
2
|
+
require 'opener/pos_taggers/en'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'open3'
|
5
|
+
require 'optparse'
|
6
|
+
|
7
|
+
require_relative 'pos_tagger/version'
|
8
|
+
require_relative 'pos_tagger/cli'
|
9
|
+
|
10
|
+
module Opener
|
11
|
+
##
|
12
|
+
# Primary POS tagger class that delegates work the various POS tagging
|
13
|
+
# kernels.
|
14
|
+
#
|
15
|
+
# @!attribute [r] options
|
16
|
+
# @return [Hash]
|
17
|
+
#
|
18
|
+
class POSTagger
|
19
|
+
attr_reader :options
|
20
|
+
|
21
|
+
##
|
22
|
+
# Hash containing the default options to use.
|
23
|
+
#
|
24
|
+
# @return [Hash]
|
25
|
+
#
|
26
|
+
DEFAULT_OPTIONS = {
|
27
|
+
:args => []
|
28
|
+
}.freeze
|
29
|
+
|
30
|
+
##
|
31
|
+
# @param [Hash] options
|
32
|
+
#
|
33
|
+
# @option options [Array] :args Arbitrary arguments to pass to the
|
34
|
+
# underlying kernel.
|
35
|
+
#
|
36
|
+
def initialize(options = {})
|
37
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
38
|
+
end
|
39
|
+
|
40
|
+
##
|
41
|
+
# Processes the input and returns an Array containing the output of STDOUT,
|
42
|
+
# STDERR and an object containing process information.
|
43
|
+
#
|
44
|
+
# @param [String] input The input to process.
|
45
|
+
# @return [Array]
|
46
|
+
#
|
47
|
+
def run(input)
|
48
|
+
language = language_from_kaf(input)
|
49
|
+
|
50
|
+
unless valid_language?(language)
|
51
|
+
raise ArgumentError, "The specified language (#{language}) is invalid"
|
52
|
+
end
|
53
|
+
|
54
|
+
kernel = language_constant(language).new(:args => options[:args])
|
55
|
+
|
56
|
+
return kernel.run(input)
|
57
|
+
end
|
58
|
+
|
59
|
+
alias tag run
|
60
|
+
|
61
|
+
protected
|
62
|
+
|
63
|
+
##
|
64
|
+
# Extracts the language from a KAF document.
|
65
|
+
#
|
66
|
+
# @param [String] input
|
67
|
+
# @return [String]
|
68
|
+
#
|
69
|
+
def language_from_kaf(input)
|
70
|
+
reader = Nokogiri::XML::Reader(input)
|
71
|
+
|
72
|
+
return reader.read.lang
|
73
|
+
end
|
74
|
+
|
75
|
+
##
|
76
|
+
# @param [String] language
|
77
|
+
# @return [Class]
|
78
|
+
#
|
79
|
+
def language_constant(language)
|
80
|
+
return language && POSTaggers.const_get(language.upcase)
|
81
|
+
end
|
82
|
+
|
83
|
+
##
|
84
|
+
# @return [TrueClass|FalseClass]
|
85
|
+
#
|
86
|
+
def valid_language?(language)
|
87
|
+
return Opener::POSTaggers.const_defined?(language.upcase)
|
88
|
+
end
|
89
|
+
end # POSTagger
|
90
|
+
end # Opener
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module Opener
|
4
|
+
class POSTagger
|
5
|
+
##
|
6
|
+
# CLI wrapper around {Opener::POSTagger} using OptionParser.
|
7
|
+
#
|
8
|
+
# @!attribute [r] options
|
9
|
+
# @return [Hash]
|
10
|
+
# @!attribute [r] option_parser
|
11
|
+
# @return [OptionParser]
|
12
|
+
#
|
13
|
+
class CLI
|
14
|
+
attr_reader :options, :option_parser
|
15
|
+
|
16
|
+
##
|
17
|
+
# @param [Hash] options
|
18
|
+
#
|
19
|
+
def initialize(options = {})
|
20
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
21
|
+
|
22
|
+
@option_parser = ::OptionParser.new do |opts|
|
23
|
+
opts.program_name = 'pos-tagger'
|
24
|
+
opts.summary_indent = ' '
|
25
|
+
|
26
|
+
opts.on('-h', '--help', 'Shows this help message') do
|
27
|
+
show_help
|
28
|
+
end
|
29
|
+
|
30
|
+
opts.on('-v', '--version', 'Shows the current version') do
|
31
|
+
show_version
|
32
|
+
end
|
33
|
+
|
34
|
+
opts.separator <<-EOF
|
35
|
+
|
36
|
+
Examples:
|
37
|
+
|
38
|
+
cat example.kaf | #{opts.program_name}
|
39
|
+
EOF
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
##
|
44
|
+
# @param [String] input
|
45
|
+
#
|
46
|
+
def run(input)
|
47
|
+
option_parser.parse!(options[:args])
|
48
|
+
|
49
|
+
tagger = POSTagger.new(options)
|
50
|
+
|
51
|
+
stdout = tagger.run(input)
|
52
|
+
|
53
|
+
puts stdout
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
##
|
59
|
+
# Shows the help message and exits the program.
|
60
|
+
#
|
61
|
+
def show_help
|
62
|
+
abort option_parser.to_s
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# Shows the version and exits the program.
|
67
|
+
#
|
68
|
+
def show_version
|
69
|
+
abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
|
70
|
+
end
|
71
|
+
end # CLI
|
72
|
+
end # POSTagger
|
73
|
+
end # Opener
|
@@ -0,0 +1,283 @@
|
|
1
|
+
input[type="text"], textarea
|
2
|
+
{
|
3
|
+
width: 500px;
|
4
|
+
}
|
5
|
+
|
6
|
+
body {
|
7
|
+
font-family: Helvetica, arial, sans-serif;
|
8
|
+
font-size: 14px;
|
9
|
+
line-height: 1.6;
|
10
|
+
padding-top: 10px;
|
11
|
+
padding-bottom: 10px;
|
12
|
+
background-color: white;
|
13
|
+
padding: 30px; }
|
14
|
+
|
15
|
+
body > *:first-child {
|
16
|
+
margin-top: 0 !important; }
|
17
|
+
body > *:last-child {
|
18
|
+
margin-bottom: 0 !important; }
|
19
|
+
|
20
|
+
a {
|
21
|
+
color: #4183C4; }
|
22
|
+
a.absent {
|
23
|
+
color: #cc0000; }
|
24
|
+
a.anchor {
|
25
|
+
display: block;
|
26
|
+
padding-left: 30px;
|
27
|
+
margin-left: -30px;
|
28
|
+
cursor: pointer;
|
29
|
+
position: absolute;
|
30
|
+
top: 0;
|
31
|
+
left: 0;
|
32
|
+
bottom: 0; }
|
33
|
+
|
34
|
+
h1, h2, h3, h4, h5, h6 {
|
35
|
+
margin: 20px 0 10px;
|
36
|
+
padding: 0;
|
37
|
+
font-weight: bold;
|
38
|
+
-webkit-font-smoothing: antialiased;
|
39
|
+
cursor: text;
|
40
|
+
position: relative; }
|
41
|
+
|
42
|
+
h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, h5:hover a.anchor, h6:hover a.anchor {
|
43
|
+
background: url("../../images/modules/styleguide/para.png") no-repeat 10px center;
|
44
|
+
text-decoration: none; }
|
45
|
+
|
46
|
+
h1 tt, h1 code {
|
47
|
+
font-size: inherit; }
|
48
|
+
|
49
|
+
h2 tt, h2 code {
|
50
|
+
font-size: inherit; }
|
51
|
+
|
52
|
+
h3 tt, h3 code {
|
53
|
+
font-size: inherit; }
|
54
|
+
|
55
|
+
h4 tt, h4 code {
|
56
|
+
font-size: inherit; }
|
57
|
+
|
58
|
+
h5 tt, h5 code {
|
59
|
+
font-size: inherit; }
|
60
|
+
|
61
|
+
h6 tt, h6 code {
|
62
|
+
font-size: inherit; }
|
63
|
+
|
64
|
+
h1 {
|
65
|
+
font-size: 28px;
|
66
|
+
color: black; }
|
67
|
+
|
68
|
+
h2 {
|
69
|
+
font-size: 24px;
|
70
|
+
border-bottom: 1px solid #cccccc;
|
71
|
+
color: black; }
|
72
|
+
|
73
|
+
h3 {
|
74
|
+
font-size: 18px; }
|
75
|
+
|
76
|
+
h4 {
|
77
|
+
font-size: 16px; }
|
78
|
+
|
79
|
+
h5 {
|
80
|
+
font-size: 14px; }
|
81
|
+
|
82
|
+
h6 {
|
83
|
+
color: #777777;
|
84
|
+
font-size: 14px; }
|
85
|
+
|
86
|
+
p, blockquote, ul, ol, dl, li, table, pre {
|
87
|
+
margin: 15px 0; }
|
88
|
+
|
89
|
+
hr {
|
90
|
+
background: transparent url("../../images/modules/pulls/dirty-shade.png") repeat-x 0 0;
|
91
|
+
border: 0 none;
|
92
|
+
color: #cccccc;
|
93
|
+
height: 4px;
|
94
|
+
padding: 0; }
|
95
|
+
|
96
|
+
body > h2:first-child {
|
97
|
+
margin-top: 0;
|
98
|
+
padding-top: 0; }
|
99
|
+
body > h1:first-child {
|
100
|
+
margin-top: 0;
|
101
|
+
padding-top: 0; }
|
102
|
+
body > h1:first-child + h2 {
|
103
|
+
margin-top: 0;
|
104
|
+
padding-top: 0; }
|
105
|
+
body > h3:first-child, body > h4:first-child, body > h5:first-child, body > h6:first-child {
|
106
|
+
margin-top: 0;
|
107
|
+
padding-top: 0; }
|
108
|
+
|
109
|
+
a:first-child h1, a:first-child h2, a:first-child h3, a:first-child h4, a:first-child h5, a:first-child h6 {
|
110
|
+
margin-top: 0;
|
111
|
+
padding-top: 0; }
|
112
|
+
|
113
|
+
h1 p, h2 p, h3 p, h4 p, h5 p, h6 p {
|
114
|
+
margin-top: 0; }
|
115
|
+
|
116
|
+
li p.first {
|
117
|
+
display: inline-block; }
|
118
|
+
|
119
|
+
ul, ol {
|
120
|
+
padding-left: 30px; }
|
121
|
+
|
122
|
+
ul :first-child, ol :first-child {
|
123
|
+
margin-top: 0; }
|
124
|
+
|
125
|
+
ul :last-child, ol :last-child {
|
126
|
+
margin-bottom: 0; }
|
127
|
+
|
128
|
+
dl {
|
129
|
+
padding: 0; }
|
130
|
+
dl dt {
|
131
|
+
font-size: 14px;
|
132
|
+
font-weight: bold;
|
133
|
+
font-style: italic;
|
134
|
+
padding: 0;
|
135
|
+
margin: 15px 0 5px; }
|
136
|
+
dl dt:first-child {
|
137
|
+
padding: 0; }
|
138
|
+
dl dt > :first-child {
|
139
|
+
margin-top: 0; }
|
140
|
+
dl dt > :last-child {
|
141
|
+
margin-bottom: 0; }
|
142
|
+
dl dd {
|
143
|
+
margin: 0 0 15px;
|
144
|
+
padding: 0 15px; }
|
145
|
+
dl dd > :first-child {
|
146
|
+
margin-top: 0; }
|
147
|
+
dl dd > :last-child {
|
148
|
+
margin-bottom: 0; }
|
149
|
+
|
150
|
+
blockquote {
|
151
|
+
border-left: 4px solid #dddddd;
|
152
|
+
padding: 0 15px;
|
153
|
+
color: #777777; }
|
154
|
+
blockquote > :first-child {
|
155
|
+
margin-top: 0; }
|
156
|
+
blockquote > :last-child {
|
157
|
+
margin-bottom: 0; }
|
158
|
+
|
159
|
+
table {
|
160
|
+
padding: 0; }
|
161
|
+
table tr {
|
162
|
+
border-top: 1px solid #cccccc;
|
163
|
+
background-color: white;
|
164
|
+
margin: 0;
|
165
|
+
padding: 0; }
|
166
|
+
table tr:nth-child(2n) {
|
167
|
+
background-color: #f8f8f8; }
|
168
|
+
table tr th {
|
169
|
+
font-weight: bold;
|
170
|
+
border: 1px solid #cccccc;
|
171
|
+
text-align: left;
|
172
|
+
margin: 0;
|
173
|
+
padding: 6px 13px; }
|
174
|
+
table tr td {
|
175
|
+
border: 1px solid #cccccc;
|
176
|
+
text-align: left;
|
177
|
+
margin: 0;
|
178
|
+
padding: 6px 13px; }
|
179
|
+
table tr th :first-child, table tr td :first-child {
|
180
|
+
margin-top: 0; }
|
181
|
+
table tr th :last-child, table tr td :last-child {
|
182
|
+
margin-bottom: 0; }
|
183
|
+
|
184
|
+
img {
|
185
|
+
max-width: 100%; }
|
186
|
+
|
187
|
+
span.frame {
|
188
|
+
display: block;
|
189
|
+
overflow: hidden; }
|
190
|
+
span.frame > span {
|
191
|
+
border: 1px solid #dddddd;
|
192
|
+
display: block;
|
193
|
+
float: left;
|
194
|
+
overflow: hidden;
|
195
|
+
margin: 13px 0 0;
|
196
|
+
padding: 7px;
|
197
|
+
width: auto; }
|
198
|
+
span.frame span img {
|
199
|
+
display: block;
|
200
|
+
float: left; }
|
201
|
+
span.frame span span {
|
202
|
+
clear: both;
|
203
|
+
color: #333333;
|
204
|
+
display: block;
|
205
|
+
padding: 5px 0 0; }
|
206
|
+
span.align-center {
|
207
|
+
display: block;
|
208
|
+
overflow: hidden;
|
209
|
+
clear: both; }
|
210
|
+
span.align-center > span {
|
211
|
+
display: block;
|
212
|
+
overflow: hidden;
|
213
|
+
margin: 13px auto 0;
|
214
|
+
text-align: center; }
|
215
|
+
span.align-center span img {
|
216
|
+
margin: 0 auto;
|
217
|
+
text-align: center; }
|
218
|
+
span.align-right {
|
219
|
+
display: block;
|
220
|
+
overflow: hidden;
|
221
|
+
clear: both; }
|
222
|
+
span.align-right > span {
|
223
|
+
display: block;
|
224
|
+
overflow: hidden;
|
225
|
+
margin: 13px 0 0;
|
226
|
+
text-align: right; }
|
227
|
+
span.align-right span img {
|
228
|
+
margin: 0;
|
229
|
+
text-align: right; }
|
230
|
+
span.float-left {
|
231
|
+
display: block;
|
232
|
+
margin-right: 13px;
|
233
|
+
overflow: hidden;
|
234
|
+
float: left; }
|
235
|
+
span.float-left span {
|
236
|
+
margin: 13px 0 0; }
|
237
|
+
span.float-right {
|
238
|
+
display: block;
|
239
|
+
margin-left: 13px;
|
240
|
+
overflow: hidden;
|
241
|
+
float: right; }
|
242
|
+
span.float-right > span {
|
243
|
+
display: block;
|
244
|
+
overflow: hidden;
|
245
|
+
margin: 13px auto 0;
|
246
|
+
text-align: right; }
|
247
|
+
|
248
|
+
code, tt {
|
249
|
+
margin: 0 2px;
|
250
|
+
padding: 0 5px;
|
251
|
+
white-space: nowrap;
|
252
|
+
border: 1px solid #eaeaea;
|
253
|
+
background-color: #f8f8f8;
|
254
|
+
border-radius: 3px; }
|
255
|
+
|
256
|
+
pre code {
|
257
|
+
margin: 0;
|
258
|
+
padding: 0;
|
259
|
+
white-space: pre;
|
260
|
+
border: none;
|
261
|
+
background: transparent; }
|
262
|
+
|
263
|
+
.highlight pre {
|
264
|
+
background-color: #f8f8f8;
|
265
|
+
border: 1px solid #cccccc;
|
266
|
+
font-size: 13px;
|
267
|
+
line-height: 19px;
|
268
|
+
overflow: auto;
|
269
|
+
padding: 6px 10px;
|
270
|
+
border-radius: 3px; }
|
271
|
+
|
272
|
+
pre {
|
273
|
+
background-color: #f8f8f8;
|
274
|
+
border: 1px solid #cccccc;
|
275
|
+
font-size: 13px;
|
276
|
+
line-height: 19px;
|
277
|
+
overflow: auto;
|
278
|
+
padding: 6px 10px;
|
279
|
+
border-radius: 3px; }
|
280
|
+
pre code, pre tt {
|
281
|
+
background-color: transparent;
|
282
|
+
border: none; }
|
283
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'sinatra/base'
|
2
|
+
require 'httpclient'
|
3
|
+
require 'opener/webservice'
|
4
|
+
|
5
|
+
module Opener
|
6
|
+
class POSTagger
|
7
|
+
##
|
8
|
+
# POS Tagger server powered by Sinatra.
|
9
|
+
#
|
10
|
+
class Server < Webservice
|
11
|
+
set :views, File.expand_path('../views', __FILE__)
|
12
|
+
text_processor POSTagger
|
13
|
+
accepted_params :input
|
14
|
+
end # Server
|
15
|
+
end # POSTagger
|
16
|
+
end # Opener
|
@@ -0,0 +1,163 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link type="text/css" rel="stylesheet" charset="UTF-8" href="markdown.css"/>
|
5
|
+
<title>POS Tagger Webservice</title>
|
6
|
+
</head>
|
7
|
+
<body>
|
8
|
+
<h1>POS Tagger Web Service</h1>
|
9
|
+
|
10
|
+
<h2>Example Usage</h2>
|
11
|
+
|
12
|
+
<p>
|
13
|
+
<pre>pos-tagger-server start</pre>
|
14
|
+
<pre>curl -d 'input=<?xml version="1.0" encoding="UTF-8" standalone="no"?><KAF version="v1.opener" xml:lang="en"><kafHeader><linguisticProcessors layer="text"><lp name="opennlp-en-tok" timestamp="2013-06-11T13:41:37Z" version="1.0"/><lp name="opennlp-en-sent" timestamp="2013-06-11T13:41:37Z" version="1.0"/></linguisticProcessors></kafHeader><text><wf length="4" offset="0" para="1" sent="1" wid="w1">this</wf><wf length="2" offset="5" para="1" sent="1" wid="w2">is</wf><wf length="2" offset="8" para="1" sent="1" wid="w3">an</wf><wf length="7" offset="11" para="1" sent="1" wid="w4">english</wf><wf length="4" offset="19" para="1" sent="1" wid="w5">text</wf></text></KAF>' http://localhost:9292 -XPOST</pre>
|
15
|
+
|
16
|
+
outputs:
|
17
|
+
|
18
|
+
<pre>
|
19
|
+
<?xml version='1.0' encoding='UTF-8'?>
|
20
|
+
<KAF version="v1.opener" xml:lang="en">
|
21
|
+
<kafHeader>
|
22
|
+
<linguisticProcessors layer="text">
|
23
|
+
<lp name="opennlp-en-tok" timestamp="2013-06-11T13:41:37Z" version="1.0"/>
|
24
|
+
<lp name="opennlp-en-sent" timestamp="2013-06-11T13:41:37Z" version="1.0"/>
|
25
|
+
</linguisticProcessors>
|
26
|
+
<linguisticProcessor layer="term">
|
27
|
+
<lp timestamp="2013-06-12T15:18:03CEST" version="1.0" name="Open nlp pos tagger"/>
|
28
|
+
</linguisticProcessor>
|
29
|
+
</kafHeader>
|
30
|
+
<text>
|
31
|
+
<wf length="4" offset="0" para="1" sent="1" wid="w1">this</wf>
|
32
|
+
<wf length="2" offset="5" para="1" sent="1" wid="w2">is</wf>
|
33
|
+
<wf length="2" offset="8" para="1" sent="1" wid="w3">an</wf>
|
34
|
+
<wf length="7" offset="11" para="1" sent="1" wid="w4">english</wf>
|
35
|
+
<wf length="4" offset="19" para="1" sent="1" wid="w5">text</wf>
|
36
|
+
</text>
|
37
|
+
<terms>
|
38
|
+
<term lemma="this" morphofeat="FM" pos="O" tid="t_1" type="open">
|
39
|
+
<span>
|
40
|
+
<target id="w1"/>
|
41
|
+
</span>
|
42
|
+
</term>
|
43
|
+
<term lemma="is" morphofeat="FM" pos="O" tid="t_2" type="open">
|
44
|
+
<span>
|
45
|
+
<target id="w2"/>
|
46
|
+
</span>
|
47
|
+
</term>
|
48
|
+
<term lemma="an" morphofeat="APPR" pos="P" tid="t_3" type="close">
|
49
|
+
<span>
|
50
|
+
<target id="w3"/>
|
51
|
+
</span>
|
52
|
+
</term>
|
53
|
+
<term lemma="english" morphofeat="FM" pos="O" tid="t_4" type="open">
|
54
|
+
<span>
|
55
|
+
<target id="w4"/>
|
56
|
+
</span>
|
57
|
+
</term>
|
58
|
+
<term lemma="text" morphofeat="FM" pos="O" tid="t_5" type="open">
|
59
|
+
<span>
|
60
|
+
<target id="w5"/>
|
61
|
+
</span>
|
62
|
+
</term>
|
63
|
+
</terms>
|
64
|
+
</KAF></pre>
|
65
|
+
</p>
|
66
|
+
|
67
|
+
<h2>Try the webservice</h2>
|
68
|
+
|
69
|
+
<p>* required</p>
|
70
|
+
<p>** When entering a value no response will be displayed in the browser.</p>
|
71
|
+
|
72
|
+
<form action="<%=url("/")%>" method="POST">
|
73
|
+
<div>
|
74
|
+
<label for="input"/>Type your text here*</label>
|
75
|
+
<br/>
|
76
|
+
|
77
|
+
<textarea name="input" id="text" rows="10" cols="50"/></textarea>
|
78
|
+
</div>
|
79
|
+
|
80
|
+
<% 10.times do |t| %>
|
81
|
+
<div>
|
82
|
+
<label for="callbacks">Callback URL <%=t+1%>(**)</label>
|
83
|
+
<br />
|
84
|
+
|
85
|
+
<input id="callbacks" type="text" name="callbacks[]" />
|
86
|
+
</div>
|
87
|
+
<% end %>
|
88
|
+
|
89
|
+
|
90
|
+
<div>
|
91
|
+
<label for="error_callback">Error Callback</label>
|
92
|
+
<br />
|
93
|
+
|
94
|
+
<input id="error_callback" type="text" name="error_callback" />
|
95
|
+
</div>
|
96
|
+
<input type="submit" value="Submit" />
|
97
|
+
</form>
|
98
|
+
|
99
|
+
<h2>Actions</h2>
|
100
|
+
|
101
|
+
<p>
|
102
|
+
<dl>
|
103
|
+
<dt>POST /</dt>
|
104
|
+
<dd>Tag the input tokenized text. See arguments listing for more options.</dd>
|
105
|
+
<dt>GET /</dt>
|
106
|
+
<dd>Show this page</dd>
|
107
|
+
</dl>
|
108
|
+
</p>
|
109
|
+
|
110
|
+
<h2>Arguments</h2>
|
111
|
+
|
112
|
+
<p> The webservice takes the following arguments: </p>
|
113
|
+
<p>* required</p>
|
114
|
+
|
115
|
+
<dl>
|
116
|
+
<dt>text*</dt>
|
117
|
+
<dd>The input text in KAF format. Sample KAF input:</dd>
|
118
|
+
<pre>
|
119
|
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
120
|
+
<KAF version="v1.opener" xml:lang="en">
|
121
|
+
<kafHeader>
|
122
|
+
<linguisticProcessors layer="text">
|
123
|
+
<lp name="opennlp-en-tok" timestamp="2013-06-11T13:41:37Z" version="1.0"/>
|
124
|
+
<lp name="opennlp-en-sent" timestamp="2013-06-11T13:41:37Z" version="1.0"/>
|
125
|
+
</linguisticProcessors>
|
126
|
+
</kafHeader>
|
127
|
+
<text>
|
128
|
+
<wf length="4" offset="0" para="1" sent="1" wid="w1">this</wf>
|
129
|
+
<wf length="2" offset="5" para="1" sent="1" wid="w2">is</wf>
|
130
|
+
<wf length="2" offset="8" para="1" sent="1" wid="w3">an</wf>
|
131
|
+
<wf length="7" offset="11" para="1" sent="1" wid="w4">english</wf>
|
132
|
+
<wf length="4" offset="19" para="1" sent="1" wid="w5">text</wf>
|
133
|
+
</text>
|
134
|
+
</KAF></pre>
|
135
|
+
|
136
|
+
<dt>callbacks</dt>
|
137
|
+
<dd>
|
138
|
+
You can provide a list of callback urls. If you provide callback urls
|
139
|
+
the POS tagger will run as a background job and a callback
|
140
|
+
with the results will be performed (POST) to the first url in the callback
|
141
|
+
list. The other urls in callback list will be provided in the "callbacks"
|
142
|
+
argument.<br/><br/>
|
143
|
+
Using callback you can chain together several OpeNER webservices in
|
144
|
+
one call. The first, will call the second, which will call the third, etc.
|
145
|
+
See for more information the <a href="http://opener-project.github.io">
|
146
|
+
webservice documentation online</a>.
|
147
|
+
</dd>
|
148
|
+
<dt>error_callback</dt>
|
149
|
+
<dd>URL to notify if errors occur in the background process. The error
|
150
|
+
callback will do a POST with the error message in the 'error' field.</dd>
|
151
|
+
</dt>
|
152
|
+
|
153
|
+
|
154
|
+
|
155
|
+
</dl>
|
156
|
+
|
157
|
+
|
158
|
+
<p>
|
159
|
+
|
160
|
+
</p>
|
161
|
+
|
162
|
+
</body>
|
163
|
+
</html>
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link type="text/css" rel="stylesheet" charset="UTF-8" href="markdown.css"/>
|
5
|
+
<title>Language Detector Webservice</title>
|
6
|
+
</head>
|
7
|
+
<body>
|
8
|
+
<h1>Output URL</h1>
|
9
|
+
<p>
|
10
|
+
When ready, you can view the result
|
11
|
+
<a href=<%= output_url %>>here</a>
|
12
|
+
</p>
|
13
|
+
|
14
|
+
</body>
|
15
|
+
</html>
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require File.expand_path('../lib/opener/pos_tagger/version', __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = 'opener-pos-tagger'
|
5
|
+
gem.version = Opener::POSTagger::VERSION
|
6
|
+
gem.authors = ['development@olery.com']
|
7
|
+
gem.summary = 'Gem that wraps up the different existing pos-taggers'
|
8
|
+
gem.description = gem.summary
|
9
|
+
gem.homepage = 'http://opener-project.github.com/'
|
10
|
+
gem.has_rdoc = "yard"
|
11
|
+
gem.required_ruby_version = ">= 1.9.2"
|
12
|
+
|
13
|
+
gem.files = Dir.glob([
|
14
|
+
'lib/**/*',
|
15
|
+
'config.ru',
|
16
|
+
'*.gemspec',
|
17
|
+
'README.md'
|
18
|
+
]).select { |file| File.file?(file) }
|
19
|
+
|
20
|
+
gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
|
21
|
+
|
22
|
+
gem.add_dependency 'opener-pos-tagger-base'
|
23
|
+
gem.add_dependency 'opener-pos-tagger-en-es'
|
24
|
+
gem.add_dependency 'opener-webservice'
|
25
|
+
|
26
|
+
gem.add_dependency 'nokogiri'
|
27
|
+
gem.add_dependency 'sinatra', '~>1.4.2'
|
28
|
+
gem.add_dependency 'httpclient'
|
29
|
+
|
30
|
+
gem.add_development_dependency 'rspec'
|
31
|
+
gem.add_development_dependency 'cucumber'
|
32
|
+
gem.add_development_dependency 'pry'
|
33
|
+
gem.add_development_dependency 'rake'
|
34
|
+
end
|
35
|
+
|
metadata
ADDED
@@ -0,0 +1,197 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: opener-pos-tagger
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- development@olery.com
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: opener-pos-tagger-base
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: opener-pos-tagger-en-es
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: opener-webservice
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: nokogiri
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: sinatra
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 1.4.2
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.4.2
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: httpclient
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rspec
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: cucumber
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: pry
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: rake
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
description: Gem that wraps up the different existing pos-taggers
|
154
|
+
email:
|
155
|
+
executables:
|
156
|
+
- pos-tagger-server
|
157
|
+
- pos-tagger
|
158
|
+
extensions: []
|
159
|
+
extra_rdoc_files: []
|
160
|
+
files:
|
161
|
+
- README.md
|
162
|
+
- bin/pos-tagger
|
163
|
+
- bin/pos-tagger-server
|
164
|
+
- config.ru
|
165
|
+
- lib/opener/pos_tagger.rb
|
166
|
+
- lib/opener/pos_tagger/cli.rb
|
167
|
+
- lib/opener/pos_tagger/public/markdown.css
|
168
|
+
- lib/opener/pos_tagger/server.rb
|
169
|
+
- lib/opener/pos_tagger/version.rb
|
170
|
+
- lib/opener/pos_tagger/views/index.erb
|
171
|
+
- lib/opener/pos_tagger/views/result.erb
|
172
|
+
- opener-pos-tagger.gemspec
|
173
|
+
homepage: http://opener-project.github.com/
|
174
|
+
licenses: []
|
175
|
+
metadata: {}
|
176
|
+
post_install_message:
|
177
|
+
rdoc_options: []
|
178
|
+
require_paths:
|
179
|
+
- lib
|
180
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
181
|
+
requirements:
|
182
|
+
- - ">="
|
183
|
+
- !ruby/object:Gem::Version
|
184
|
+
version: 1.9.2
|
185
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
186
|
+
requirements:
|
187
|
+
- - ">="
|
188
|
+
- !ruby/object:Gem::Version
|
189
|
+
version: '0'
|
190
|
+
requirements: []
|
191
|
+
rubyforge_project:
|
192
|
+
rubygems_version: 2.2.2
|
193
|
+
signing_key:
|
194
|
+
specification_version: 4
|
195
|
+
summary: Gem that wraps up the different existing pos-taggers
|
196
|
+
test_files: []
|
197
|
+
has_rdoc: yard
|