opener-tokenizer 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +165 -0
- data/bin/tokenizer +7 -0
- data/bin/tokenizer-daemon +9 -0
- data/bin/tokenizer-server +10 -0
- data/config.ru +4 -0
- data/exec/tokenizer.rb +8 -0
- data/lib/opener/tokenizer/cli.rb +117 -0
- data/lib/opener/tokenizer/public/markdown.css +283 -0
- data/lib/opener/tokenizer/server.rb +16 -0
- data/lib/opener/tokenizer/version.rb +5 -0
- data/lib/opener/tokenizer/views/index.erb +162 -0
- data/lib/opener/tokenizer/views/result.erb +15 -0
- data/lib/opener/tokenizer.rb +109 -0
- data/opener-tokenizer.gemspec +36 -0
- metadata +200 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: fcf7f5ea8023ba15ef71ca6fb82839f7404cf1bf
|
4
|
+
data.tar.gz: 021f76b37d483bea54bc3f41ccb1c85a35432003
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d359e3fdfc7f5792958df137725fcdb02192f52e8a321654628bb5ed4a954b09ef461f5f6a69ed2215e813cad663b945c61cf8a262a80775d882dba98cc185a3
|
7
|
+
data.tar.gz: 77d3ab3fd86a8dd12d8fe4210f36d40e1fb1185cfaab0e5fe40eba34b44b84e62ffe80e042e14b6c90aa67f2b58feaf4fcffd6e8aac85339d6dae9ba7564d22a
|
data/README.md
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
Introduction
|
2
|
+
------------
|
3
|
+
|
4
|
+
The tokenizer tokenizes a text into sentences and words.
|
5
|
+
|
6
|
+
### Confused by some terminology?
|
7
|
+
|
8
|
+
This software is part of a larger collection of natural language processing
|
9
|
+
tools known as "the OpeNER project". You can find more information about the
|
10
|
+
project at (the OpeNER portal)[http://opener-project.github.io]. There you can
|
11
|
+
also find references to terms like KAF (an XML standard to represent linguistic
|
12
|
+
annotations in texts), component, cores, scenario's and pipelines.
|
13
|
+
|
14
|
+
Quick Use Example
|
15
|
+
-----------------
|
16
|
+
|
17
|
+
Installing the tokenizer can be done by executing:
|
18
|
+
|
19
|
+
gem install tokenizer
|
20
|
+
|
21
|
+
Please bare in mind that all components in OpeNER take KAF as an input and
|
22
|
+
output KAF by default.
|
23
|
+
|
24
|
+
|
25
|
+
### Command line interface
|
26
|
+
|
27
|
+
You should now be able to call the tokenizer as a regular shell
|
28
|
+
command: by its name. Once installed the gem normalyl sits in your path so you can call it directly from anywhere.
|
29
|
+
|
30
|
+
Tokenizing some text:
|
31
|
+
|
32
|
+
echo "This is English text" | tokenizer -l en --no-kaf
|
33
|
+
|
34
|
+
Will result in
|
35
|
+
|
36
|
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
37
|
+
<KAF version="v1.opener" xml:lang="en">
|
38
|
+
<kafHeader>
|
39
|
+
<linguisticProcessors layer="text">
|
40
|
+
<lp name="opener-sentence-splitter-en" timestamp="2013-05-31T11:39:31Z" version="0.0.1"/>
|
41
|
+
<lp name="opener-tokenizer-en" timestamp="2013-05-31T11:39:32Z" version="1.0.1"/>
|
42
|
+
</linguisticProcessors>
|
43
|
+
</kafHeader>
|
44
|
+
<text>
|
45
|
+
<wf length="4" offset="0" para="1" sent="1" wid="w1">This</wf>
|
46
|
+
<wf length="2" offset="5" para="1" sent="1" wid="w2">is</wf>
|
47
|
+
<wf length="7" offset="8" para="1" sent="1" wid="w3">English</wf>
|
48
|
+
<wf length="4" offset="16" para="1" sent="1" wid="w4">text</wf>
|
49
|
+
</text>
|
50
|
+
</KAF>
|
51
|
+
|
52
|
+
The available languages for tokenization are: English (en), German (de), Dutch (nl), French (fr), Spanish (es), Italian (it)
|
53
|
+
|
54
|
+
#### KAF input format
|
55
|
+
|
56
|
+
The tokenizer is capable of taking KAF as input, and actually does so by
|
57
|
+
default. You can do so like this:
|
58
|
+
|
59
|
+
echo "<?xml version='1.0' encoding='UTF-8' standalone='no'?><KAF version='v1.opener' xml:lang='en'><raw>This is what I call, a test!</raw></KAF>" | tokenizer
|
60
|
+
|
61
|
+
Will result in
|
62
|
+
|
63
|
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
64
|
+
<KAF version="v1.opener" xml:lang="en">
|
65
|
+
<kafHeader>
|
66
|
+
<linguisticProcessors layer="text">
|
67
|
+
<lp name="opener-sentence-splitter-en" timestamp="2013-05-31T11:39:31Z" version="0.0.1"/>
|
68
|
+
<lp name="opener-tokenizer-en" timestamp="2013-05-31T11:39:32Z" version="1.0.1"/>
|
69
|
+
</linguisticProcessors>
|
70
|
+
</kafHeader>
|
71
|
+
<text>
|
72
|
+
<wf length="4" offset="0" para="1" sent="1" wid="w1">this</wf>
|
73
|
+
<wf length="2" offset="5" para="1" sent="1" wid="w2">is</wf>
|
74
|
+
<wf length="2" offset="8" para="1" sent="1" wid="w3">an</wf>
|
75
|
+
<wf length="7" offset="11" para="1" sent="1" wid="w4">english</wf>
|
76
|
+
<wf length="4" offset="19" para="1" sent="1" wid="w5">text</wf>
|
77
|
+
</text>
|
78
|
+
</KAF>
|
79
|
+
|
80
|
+
If the argument -k (--kaf) is passed, then the argument -l (--language) is ignored.
|
81
|
+
|
82
|
+
### Webservices
|
83
|
+
|
84
|
+
You can launch a language identification webservice by executing:
|
85
|
+
|
86
|
+
tokenizer-server
|
87
|
+
|
88
|
+
This will launch a mini webserver with the webservice. It defaults to port 9292,
|
89
|
+
so you can access it at <http://localhost:9292>.
|
90
|
+
|
91
|
+
To launch it on a different port provide the `-p [port-number]` option like
|
92
|
+
this:
|
93
|
+
|
94
|
+
tokenizer-server -p 1234
|
95
|
+
|
96
|
+
It then launches at <http://localhost:1234>
|
97
|
+
|
98
|
+
Documentation on the Webservice is provided by surfing to the urls provided
|
99
|
+
above. For more information on how to launch a webservice run the command with
|
100
|
+
the ```-h``` option.
|
101
|
+
|
102
|
+
|
103
|
+
### Daemon
|
104
|
+
|
105
|
+
Last but not least the tokenizer comes shipped with a daemon that
|
106
|
+
can read jobs (and write) jobs to and from Amazon SQS queues. For more
|
107
|
+
information type:
|
108
|
+
|
109
|
+
tokenizer-daemon -h
|
110
|
+
|
111
|
+
Description of dependencies
|
112
|
+
---------------------------
|
113
|
+
|
114
|
+
This component runs best if you run it in an environment suited for OpeNER
|
115
|
+
components. You can find an installation guide and helper tools in the (OpeNER
|
116
|
+
installer)[https://github.com/opener-project/opener-installer] and (an
|
117
|
+
installation guide on the Opener
|
118
|
+
Website)[http://opener-project.github.io/getting-started/how-to/local-installation.html]
|
119
|
+
|
120
|
+
At least you need the following system setup:
|
121
|
+
|
122
|
+
### Depenencies for normal use:
|
123
|
+
|
124
|
+
* Perl 5
|
125
|
+
* MRI 1.9.3
|
126
|
+
|
127
|
+
### Dependencies if you want to modify the component:
|
128
|
+
|
129
|
+
* Maven (for building the Gem)
|
130
|
+
|
131
|
+
|
132
|
+
Language Extension
|
133
|
+
------------------
|
134
|
+
|
135
|
+
TODO
|
136
|
+
|
137
|
+
The Core
|
138
|
+
--------
|
139
|
+
|
140
|
+
The component is a fat wrapper around the actual language technology core. You
|
141
|
+
can find the core technolies in the following repositories:
|
142
|
+
|
143
|
+
* (tokenizer-base)[http://github.com/opener-project/tokenizer-base]
|
144
|
+
|
145
|
+
Where to go from here
|
146
|
+
---------------------
|
147
|
+
|
148
|
+
* Check (the project websitere)[http://opener-project.github.io]
|
149
|
+
* (Checkout the webservice)[http://opener.olery.com/tokenizer]
|
150
|
+
|
151
|
+
Report problem/Get help
|
152
|
+
-----------------------
|
153
|
+
|
154
|
+
If you encounter problems, please email support@opener-project.eu or leave an
|
155
|
+
issue in the (issue tracker)[https://github.com/opener-project/tokenizer/issues].
|
156
|
+
|
157
|
+
|
158
|
+
Contributing
|
159
|
+
------------
|
160
|
+
|
161
|
+
1. Fork it ( http://github.com/opener-project/tokenizer/fork )
|
162
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
163
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
164
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
165
|
+
5. Create new Pull Request
|
data/bin/tokenizer
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rack'
|
4
|
+
|
5
|
+
# Without calling `Rack::Server#options` manually the CLI arguments will never
|
6
|
+
# be passed, thus the application can't be specified as a constructor argument.
|
7
|
+
server = Rack::Server.new
|
8
|
+
server.options[:config] = File.expand_path('../../config.ru', __FILE__)
|
9
|
+
|
10
|
+
server.start
|
data/config.ru
ADDED
data/exec/tokenizer.rb
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
module Opener
|
2
|
+
class Tokenizer
|
3
|
+
##
|
4
|
+
# CLI wrapper around {Opener::Tokenizer} using OptionParser.
|
5
|
+
#
|
6
|
+
# @!attribute [r] options
|
7
|
+
# @return [Hash]
|
8
|
+
# @!attribute [r] option_parser
|
9
|
+
# @return [OptionParser]
|
10
|
+
#
|
11
|
+
class CLI
|
12
|
+
attr_reader :options, :option_parser
|
13
|
+
|
14
|
+
##
|
15
|
+
# @param [Hash] options
|
16
|
+
#
|
17
|
+
def initialize(options = {})
|
18
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
19
|
+
|
20
|
+
@option_parser = OptionParser.new do |opts|
|
21
|
+
opts.program_name = 'tokenizer'
|
22
|
+
opts.summary_indent = ' '
|
23
|
+
|
24
|
+
opts.on('-h', '--help', 'Shows this help message') do
|
25
|
+
show_help
|
26
|
+
end
|
27
|
+
|
28
|
+
opts.on('-v', '--version', 'Shows the current version') do
|
29
|
+
show_version
|
30
|
+
end
|
31
|
+
|
32
|
+
opts.on(
|
33
|
+
'-l',
|
34
|
+
'--language [VALUE]',
|
35
|
+
'Uses this specific language'
|
36
|
+
) do |value|
|
37
|
+
@options[:language] = value
|
38
|
+
@options[:kaf] = false
|
39
|
+
end
|
40
|
+
|
41
|
+
opts.on('-k', '--kaf', 'Treats the input as a KAF document') do
|
42
|
+
@options[:kaf] = true
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on('-p', '--plain', 'Treats the input as plain text') do
|
46
|
+
@options[:kaf] = false
|
47
|
+
end
|
48
|
+
|
49
|
+
opts.separator <<-EOF
|
50
|
+
|
51
|
+
Examples:
|
52
|
+
|
53
|
+
cat example.txt | #{opts.program_name} -l en # Manually specify the language
|
54
|
+
cat example.kaf | #{opts.program_name} # Uses the xml:lang attribute
|
55
|
+
|
56
|
+
Languages:
|
57
|
+
|
58
|
+
* Dutch (nl)
|
59
|
+
* English (en)
|
60
|
+
* French (fr)
|
61
|
+
* German (de)
|
62
|
+
* Italian (it)
|
63
|
+
* Spanish (es)
|
64
|
+
|
65
|
+
KAF Input:
|
66
|
+
|
67
|
+
If you give a KAF file as an input (-k or --kaf) the language is taken from
|
68
|
+
the xml:lang attribute inside the file. Else it expects that you give the
|
69
|
+
language as an argument (-l or --language)
|
70
|
+
|
71
|
+
Sample KAF syntax:
|
72
|
+
|
73
|
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
74
|
+
<KAF version="v1.opener" xml:lang="en">
|
75
|
+
<raw>This is some text.</raw>
|
76
|
+
</KAF>
|
77
|
+
EOF
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
##
|
82
|
+
# @param [String] input
|
83
|
+
#
|
84
|
+
def run(input)
|
85
|
+
option_parser.parse!(options[:args])
|
86
|
+
|
87
|
+
tokenizer = Tokenizer.new(options)
|
88
|
+
|
89
|
+
stdout, stderr, process = tokenizer.run(input)
|
90
|
+
|
91
|
+
if process.success?
|
92
|
+
puts stdout
|
93
|
+
|
94
|
+
STDERR.puts(stderr) unless stderr.empty?
|
95
|
+
else
|
96
|
+
abort stderr
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
##
|
103
|
+
# Shows the help message and exits the program.
|
104
|
+
#
|
105
|
+
def show_help
|
106
|
+
abort option_parser.to_s
|
107
|
+
end
|
108
|
+
|
109
|
+
##
|
110
|
+
# Shows the version and exits the program.
|
111
|
+
#
|
112
|
+
def show_version
|
113
|
+
abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
|
114
|
+
end
|
115
|
+
end # CLI
|
116
|
+
end # Tokenizer
|
117
|
+
end # Opener
|
@@ -0,0 +1,283 @@
|
|
1
|
+
input[type="text"], textarea
|
2
|
+
{
|
3
|
+
width: 500px;
|
4
|
+
}
|
5
|
+
|
6
|
+
body {
|
7
|
+
font-family: Helvetica, arial, sans-serif;
|
8
|
+
font-size: 14px;
|
9
|
+
line-height: 1.6;
|
10
|
+
padding-top: 10px;
|
11
|
+
padding-bottom: 10px;
|
12
|
+
background-color: white;
|
13
|
+
padding: 30px; }
|
14
|
+
|
15
|
+
body > *:first-child {
|
16
|
+
margin-top: 0 !important; }
|
17
|
+
body > *:last-child {
|
18
|
+
margin-bottom: 0 !important; }
|
19
|
+
|
20
|
+
a {
|
21
|
+
color: #4183C4; }
|
22
|
+
a.absent {
|
23
|
+
color: #cc0000; }
|
24
|
+
a.anchor {
|
25
|
+
display: block;
|
26
|
+
padding-left: 30px;
|
27
|
+
margin-left: -30px;
|
28
|
+
cursor: pointer;
|
29
|
+
position: absolute;
|
30
|
+
top: 0;
|
31
|
+
left: 0;
|
32
|
+
bottom: 0; }
|
33
|
+
|
34
|
+
h1, h2, h3, h4, h5, h6 {
|
35
|
+
margin: 20px 0 10px;
|
36
|
+
padding: 0;
|
37
|
+
font-weight: bold;
|
38
|
+
-webkit-font-smoothing: antialiased;
|
39
|
+
cursor: text;
|
40
|
+
position: relative; }
|
41
|
+
|
42
|
+
h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, h5:hover a.anchor, h6:hover a.anchor {
|
43
|
+
background: url("../../images/modules/styleguide/para.png") no-repeat 10px center;
|
44
|
+
text-decoration: none; }
|
45
|
+
|
46
|
+
h1 tt, h1 code {
|
47
|
+
font-size: inherit; }
|
48
|
+
|
49
|
+
h2 tt, h2 code {
|
50
|
+
font-size: inherit; }
|
51
|
+
|
52
|
+
h3 tt, h3 code {
|
53
|
+
font-size: inherit; }
|
54
|
+
|
55
|
+
h4 tt, h4 code {
|
56
|
+
font-size: inherit; }
|
57
|
+
|
58
|
+
h5 tt, h5 code {
|
59
|
+
font-size: inherit; }
|
60
|
+
|
61
|
+
h6 tt, h6 code {
|
62
|
+
font-size: inherit; }
|
63
|
+
|
64
|
+
h1 {
|
65
|
+
font-size: 28px;
|
66
|
+
color: black; }
|
67
|
+
|
68
|
+
h2 {
|
69
|
+
font-size: 24px;
|
70
|
+
border-bottom: 1px solid #cccccc;
|
71
|
+
color: black; }
|
72
|
+
|
73
|
+
h3 {
|
74
|
+
font-size: 18px; }
|
75
|
+
|
76
|
+
h4 {
|
77
|
+
font-size: 16px; }
|
78
|
+
|
79
|
+
h5 {
|
80
|
+
font-size: 14px; }
|
81
|
+
|
82
|
+
h6 {
|
83
|
+
color: #777777;
|
84
|
+
font-size: 14px; }
|
85
|
+
|
86
|
+
p, blockquote, ul, ol, dl, li, table, pre {
|
87
|
+
margin: 15px 0; }
|
88
|
+
|
89
|
+
hr {
|
90
|
+
background: transparent url("../../images/modules/pulls/dirty-shade.png") repeat-x 0 0;
|
91
|
+
border: 0 none;
|
92
|
+
color: #cccccc;
|
93
|
+
height: 4px;
|
94
|
+
padding: 0; }
|
95
|
+
|
96
|
+
body > h2:first-child {
|
97
|
+
margin-top: 0;
|
98
|
+
padding-top: 0; }
|
99
|
+
body > h1:first-child {
|
100
|
+
margin-top: 0;
|
101
|
+
padding-top: 0; }
|
102
|
+
body > h1:first-child + h2 {
|
103
|
+
margin-top: 0;
|
104
|
+
padding-top: 0; }
|
105
|
+
body > h3:first-child, body > h4:first-child, body > h5:first-child, body > h6:first-child {
|
106
|
+
margin-top: 0;
|
107
|
+
padding-top: 0; }
|
108
|
+
|
109
|
+
a:first-child h1, a:first-child h2, a:first-child h3, a:first-child h4, a:first-child h5, a:first-child h6 {
|
110
|
+
margin-top: 0;
|
111
|
+
padding-top: 0; }
|
112
|
+
|
113
|
+
h1 p, h2 p, h3 p, h4 p, h5 p, h6 p {
|
114
|
+
margin-top: 0; }
|
115
|
+
|
116
|
+
li p.first {
|
117
|
+
display: inline-block; }
|
118
|
+
|
119
|
+
ul, ol {
|
120
|
+
padding-left: 30px; }
|
121
|
+
|
122
|
+
ul :first-child, ol :first-child {
|
123
|
+
margin-top: 0; }
|
124
|
+
|
125
|
+
ul :last-child, ol :last-child {
|
126
|
+
margin-bottom: 0; }
|
127
|
+
|
128
|
+
dl {
|
129
|
+
padding: 0; }
|
130
|
+
dl dt {
|
131
|
+
font-size: 14px;
|
132
|
+
font-weight: bold;
|
133
|
+
font-style: italic;
|
134
|
+
padding: 0;
|
135
|
+
margin: 15px 0 5px; }
|
136
|
+
dl dt:first-child {
|
137
|
+
padding: 0; }
|
138
|
+
dl dt > :first-child {
|
139
|
+
margin-top: 0; }
|
140
|
+
dl dt > :last-child {
|
141
|
+
margin-bottom: 0; }
|
142
|
+
dl dd {
|
143
|
+
margin: 0 0 15px;
|
144
|
+
padding: 0 15px; }
|
145
|
+
dl dd > :first-child {
|
146
|
+
margin-top: 0; }
|
147
|
+
dl dd > :last-child {
|
148
|
+
margin-bottom: 0; }
|
149
|
+
|
150
|
+
blockquote {
|
151
|
+
border-left: 4px solid #dddddd;
|
152
|
+
padding: 0 15px;
|
153
|
+
color: #777777; }
|
154
|
+
blockquote > :first-child {
|
155
|
+
margin-top: 0; }
|
156
|
+
blockquote > :last-child {
|
157
|
+
margin-bottom: 0; }
|
158
|
+
|
159
|
+
table {
|
160
|
+
padding: 0; }
|
161
|
+
table tr {
|
162
|
+
border-top: 1px solid #cccccc;
|
163
|
+
background-color: white;
|
164
|
+
margin: 0;
|
165
|
+
padding: 0; }
|
166
|
+
table tr:nth-child(2n) {
|
167
|
+
background-color: #f8f8f8; }
|
168
|
+
table tr th {
|
169
|
+
font-weight: bold;
|
170
|
+
border: 1px solid #cccccc;
|
171
|
+
text-align: left;
|
172
|
+
margin: 0;
|
173
|
+
padding: 6px 13px; }
|
174
|
+
table tr td {
|
175
|
+
border: 1px solid #cccccc;
|
176
|
+
text-align: left;
|
177
|
+
margin: 0;
|
178
|
+
padding: 6px 13px; }
|
179
|
+
table tr th :first-child, table tr td :first-child {
|
180
|
+
margin-top: 0; }
|
181
|
+
table tr th :last-child, table tr td :last-child {
|
182
|
+
margin-bottom: 0; }
|
183
|
+
|
184
|
+
img {
|
185
|
+
max-width: 100%; }
|
186
|
+
|
187
|
+
span.frame {
|
188
|
+
display: block;
|
189
|
+
overflow: hidden; }
|
190
|
+
span.frame > span {
|
191
|
+
border: 1px solid #dddddd;
|
192
|
+
display: block;
|
193
|
+
float: left;
|
194
|
+
overflow: hidden;
|
195
|
+
margin: 13px 0 0;
|
196
|
+
padding: 7px;
|
197
|
+
width: auto; }
|
198
|
+
span.frame span img {
|
199
|
+
display: block;
|
200
|
+
float: left; }
|
201
|
+
span.frame span span {
|
202
|
+
clear: both;
|
203
|
+
color: #333333;
|
204
|
+
display: block;
|
205
|
+
padding: 5px 0 0; }
|
206
|
+
span.align-center {
|
207
|
+
display: block;
|
208
|
+
overflow: hidden;
|
209
|
+
clear: both; }
|
210
|
+
span.align-center > span {
|
211
|
+
display: block;
|
212
|
+
overflow: hidden;
|
213
|
+
margin: 13px auto 0;
|
214
|
+
text-align: center; }
|
215
|
+
span.align-center span img {
|
216
|
+
margin: 0 auto;
|
217
|
+
text-align: center; }
|
218
|
+
span.align-right {
|
219
|
+
display: block;
|
220
|
+
overflow: hidden;
|
221
|
+
clear: both; }
|
222
|
+
span.align-right > span {
|
223
|
+
display: block;
|
224
|
+
overflow: hidden;
|
225
|
+
margin: 13px 0 0;
|
226
|
+
text-align: right; }
|
227
|
+
span.align-right span img {
|
228
|
+
margin: 0;
|
229
|
+
text-align: right; }
|
230
|
+
span.float-left {
|
231
|
+
display: block;
|
232
|
+
margin-right: 13px;
|
233
|
+
overflow: hidden;
|
234
|
+
float: left; }
|
235
|
+
span.float-left span {
|
236
|
+
margin: 13px 0 0; }
|
237
|
+
span.float-right {
|
238
|
+
display: block;
|
239
|
+
margin-left: 13px;
|
240
|
+
overflow: hidden;
|
241
|
+
float: right; }
|
242
|
+
span.float-right > span {
|
243
|
+
display: block;
|
244
|
+
overflow: hidden;
|
245
|
+
margin: 13px auto 0;
|
246
|
+
text-align: right; }
|
247
|
+
|
248
|
+
code, tt {
|
249
|
+
margin: 0 2px;
|
250
|
+
padding: 0 5px;
|
251
|
+
white-space: nowrap;
|
252
|
+
border: 1px solid #eaeaea;
|
253
|
+
background-color: #f8f8f8;
|
254
|
+
border-radius: 3px; }
|
255
|
+
|
256
|
+
pre code {
|
257
|
+
margin: 0;
|
258
|
+
padding: 0;
|
259
|
+
white-space: pre;
|
260
|
+
border: none;
|
261
|
+
background: transparent; }
|
262
|
+
|
263
|
+
.highlight pre {
|
264
|
+
background-color: #f8f8f8;
|
265
|
+
border: 1px solid #cccccc;
|
266
|
+
font-size: 13px;
|
267
|
+
line-height: 19px;
|
268
|
+
overflow: auto;
|
269
|
+
padding: 6px 10px;
|
270
|
+
border-radius: 3px; }
|
271
|
+
|
272
|
+
pre {
|
273
|
+
background-color: #f8f8f8;
|
274
|
+
border: 1px solid #cccccc;
|
275
|
+
font-size: 13px;
|
276
|
+
line-height: 19px;
|
277
|
+
overflow: auto;
|
278
|
+
padding: 6px 10px;
|
279
|
+
border-radius: 3px; }
|
280
|
+
pre code, pre tt {
|
281
|
+
background-color: transparent;
|
282
|
+
border: none; }
|
283
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'sinatra/base'
|
2
|
+
require 'httpclient'
|
3
|
+
require 'opener/webservice'
|
4
|
+
|
5
|
+
module Opener
|
6
|
+
class Tokenizer
|
7
|
+
##
|
8
|
+
# Text tokenizer server powered by Sinatra.
|
9
|
+
#
|
10
|
+
class Server < Webservice
|
11
|
+
set :views, File.expand_path('../views', __FILE__)
|
12
|
+
text_processor Tokenizer
|
13
|
+
accepted_params :input, :kaf, :language
|
14
|
+
end # Server
|
15
|
+
end # Tokenizer
|
16
|
+
end # Opener
|
@@ -0,0 +1,162 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link type="text/css" rel="stylesheet" charset="UTF-8" href="markdown.css"/>
|
5
|
+
<title>Tokenizer Webservice</title>
|
6
|
+
</head>
|
7
|
+
<body>
|
8
|
+
<h1>Tokenizer Web Service</h1>
|
9
|
+
|
10
|
+
<h2>Example Usage</h2>
|
11
|
+
|
12
|
+
<p>
|
13
|
+
<pre>tokenizer-server start</pre>
|
14
|
+
<pre>curl -d "input=this is an english text&language=en" http://localhost:9393 -XPOST</pre>
|
15
|
+
|
16
|
+
outputs:
|
17
|
+
|
18
|
+
<pre><?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
19
|
+
<KAF version="v1.opener" xml:lang="en">
|
20
|
+
<kafHeader>
|
21
|
+
<linguisticProcessors layer="text">
|
22
|
+
<lp name="opener-sentence-splitter-en" timestamp="2013-06-11T13:29:21Z" version="0.0.1"/>
|
23
|
+
<lp name="opener-tokenizer-en" timestamp="2013-06-11T13:29:22Z" version="1.0.1"/>
|
24
|
+
</linguisticProcessors>
|
25
|
+
</kafHeader>
|
26
|
+
<text>
|
27
|
+
<wf length="4" offset="0" para="1" sent="1" wid="w1">this</wf>
|
28
|
+
<wf length="2" offset="5" para="1" sent="1" wid="w2">is</wf>
|
29
|
+
<wf length="2" offset="8" para="1" sent="1" wid="w3">an</wf>
|
30
|
+
<wf length="7" offset="11" para="1" sent="1" wid="w4">english</wf>
|
31
|
+
<wf length="4" offset="19" para="1" sent="1" wid="w5">text</wf>
|
32
|
+
</text>
|
33
|
+
</KAF></pre>
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
<pre>curl -d 'text=<?xml version="1.0" encoding="UTF-8" standalone="yes"?><KAF xml:lang="en"><raw>this is an english text</raw></KAF>&kaf=true' http://localhost:9292 -XPOST</pre>
|
38
|
+
|
39
|
+
outputs:
|
40
|
+
|
41
|
+
<pre><?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
42
|
+
<KAF version="v1.opener" xml:lang="en">
|
43
|
+
<kafHeader>
|
44
|
+
<linguisticProcessors layer="text">
|
45
|
+
<lp name="opener-sentence-splitter-en" timestamp="2013-06-11T13:26:15Z" version="0.0.1"/>
|
46
|
+
<lp name="opener-tokenizer-en" timestamp="2013-06-11T13:26:16Z" version="1.0.1"/>
|
47
|
+
</linguisticProcessors>
|
48
|
+
</kafHeader>
|
49
|
+
<text>
|
50
|
+
<wf length="4" offset="0" para="1" sent="1" wid="w1">this</wf>
|
51
|
+
<wf length="2" offset="5" para="1" sent="1" wid="w2">is</wf>
|
52
|
+
<wf length="2" offset="8" para="1" sent="1" wid="w3">an</wf>
|
53
|
+
<wf length="7" offset="11" para="1" sent="1" wid="w4">english</wf>
|
54
|
+
<wf length="4" offset="19" para="1" sent="1" wid="w5">text</wf>
|
55
|
+
</text>
|
56
|
+
</KAF></pre>
|
57
|
+
</p>
|
58
|
+
|
59
|
+
<h2>Try the webservice</h2>
|
60
|
+
|
61
|
+
<p>* required</p>
|
62
|
+
<p>** When entering a value no response will be displayed in the browser.</p>
|
63
|
+
|
64
|
+
<form action="<%=url("/")%>" method="POST">
|
65
|
+
<div>
|
66
|
+
<label for="input"/>Type your text here*</label>
|
67
|
+
<br/>
|
68
|
+
|
69
|
+
<textarea name="input" id="input" rows="10" cols="50"/></textarea>
|
70
|
+
</div>
|
71
|
+
|
72
|
+
<% 10.times do |t| %>
|
73
|
+
<div>
|
74
|
+
<label for="callbacks">Callback URL <%=t+1%>(**)</label>
|
75
|
+
<br />
|
76
|
+
|
77
|
+
<input id="callbacks" type="text" name="callbacks[]" />
|
78
|
+
</div>
|
79
|
+
<% end %>
|
80
|
+
|
81
|
+
|
82
|
+
<div>
|
83
|
+
<label for="error_callback">Error Callback</label>
|
84
|
+
<br />
|
85
|
+
|
86
|
+
<input id="error_callback" type="text" name="error_callback" />
|
87
|
+
</div>
|
88
|
+
<div>
|
89
|
+
<label for="kaf">
|
90
|
+
<input type="checkbox" name="kaf" value="false" id="kaf"/>
|
91
|
+
The input is in raw text (as opposed to kaf) format.
|
92
|
+
</label>
|
93
|
+
|
94
|
+
<br/>
|
95
|
+
|
96
|
+
<label for="language">
|
97
|
+
Choose the language of the text from the list.
|
98
|
+
<select name="language" id="language">
|
99
|
+
<option value="en">English</option>
|
100
|
+
<option value="de">German</option>
|
101
|
+
<option value="nl">Dutch</option>
|
102
|
+
<option value="fr">French</option>
|
103
|
+
<option value="es">Spanish</option>
|
104
|
+
<option value="it">Italian</option>
|
105
|
+
</select>
|
106
|
+
</label>
|
107
|
+
</div>
|
108
|
+
|
109
|
+
<input type="submit" value="Submit" />
|
110
|
+
</form>
|
111
|
+
|
112
|
+
<h2>Actions</h2>
|
113
|
+
|
114
|
+
<p>
|
115
|
+
<dl>
|
116
|
+
<dt>POST /</dt>
|
117
|
+
<dd>Tokenize the input text. See arguments listing for more options.</dd>
|
118
|
+
<dt>GET /</dt>
|
119
|
+
<dd>Show this page</dd>
|
120
|
+
</dl>
|
121
|
+
</p>
|
122
|
+
|
123
|
+
<h2>Arguments</h2>
|
124
|
+
|
125
|
+
<p> The webservice takes the following arguments: </p>
|
126
|
+
<p>* required</p>
|
127
|
+
|
128
|
+
<dl>
|
129
|
+
<dt>text*</dt>
|
130
|
+
<dd>The input text</dd>
|
131
|
+
<dt>kaf [true | false]</dt>
|
132
|
+
<dd>The input is in KAF format.</dd>
|
133
|
+
<dt>language [English | German | Dutch | French | Spanish | Italian]</dt>
|
134
|
+
<dd>The language of the provided text</dt>
|
135
|
+
<dt>callbacks</dt>
|
136
|
+
<dd>
|
137
|
+
You can provide a list of callback urls. If you provide callback urls
|
138
|
+
the tokenizer will run as a background job and a callback
|
139
|
+
with the results will be performed (POST) to the first url in the callback
|
140
|
+
list. The other urls in callback list will be provided in the "callbacks"
|
141
|
+
argument.<br/><br/>
|
142
|
+
Using callback you can chain together several OpeNER webservices in
|
143
|
+
one call. The first, will call the second, which will call the third, etc.
|
144
|
+
See for more information the <a href="http://opener-project.github.io">
|
145
|
+
webservice documentation online</a>.
|
146
|
+
</dd>
|
147
|
+
<dt>error_callback</dt>
|
148
|
+
<dd>URL to notify if errors occur in the background process. The error
|
149
|
+
callback will do a POST with the error message in the 'error' field.</dd>
|
150
|
+
</dt>
|
151
|
+
|
152
|
+
|
153
|
+
|
154
|
+
</dl>
|
155
|
+
|
156
|
+
|
157
|
+
<p>
|
158
|
+
|
159
|
+
</p>
|
160
|
+
|
161
|
+
</body>
|
162
|
+
</html>
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link type="text/css" rel="stylesheet" charset="UTF-8" href="markdown.css"/>
|
5
|
+
<title>Language Detector Webservice</title>
|
6
|
+
</head>
|
7
|
+
<body>
|
8
|
+
<h1>Output URL</h1>
|
9
|
+
<p>
|
10
|
+
When ready, you can view the result
|
11
|
+
<a href=<%= output_url %>>here</a>
|
12
|
+
</p>
|
13
|
+
|
14
|
+
</body>
|
15
|
+
</html>
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'opener/tokenizers/base'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'open3'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
require_relative 'tokenizer/version'
|
7
|
+
require_relative 'tokenizer/cli'
|
8
|
+
|
9
|
+
module Opener
|
10
|
+
##
|
11
|
+
# Primary tokenizer class that delegates the work to the various language
|
12
|
+
# specific tokenizers.
|
13
|
+
#
|
14
|
+
# @!attribute [r] options
|
15
|
+
# @return [Hash]
|
16
|
+
#
|
17
|
+
class Tokenizer
|
18
|
+
attr_reader :options
|
19
|
+
|
20
|
+
##
|
21
|
+
# The default language to use when no custom one is specified.
|
22
|
+
#
|
23
|
+
# @return [String]
|
24
|
+
#
|
25
|
+
DEFAULT_LANGUAGE = 'en'.freeze
|
26
|
+
|
27
|
+
##
|
28
|
+
# Hash containing the default options to use.
|
29
|
+
#
|
30
|
+
# @return [Hash]
|
31
|
+
#
|
32
|
+
DEFAULT_OPTIONS = {
|
33
|
+
:args => [],
|
34
|
+
:kaf => true,
|
35
|
+
:language => DEFAULT_LANGUAGE
|
36
|
+
}.freeze
|
37
|
+
|
38
|
+
##
|
39
|
+
# @param [Hash] options
|
40
|
+
#
|
41
|
+
# @option options [Array] :args Collection of arbitrary arguments to pass
|
42
|
+
# to the individual tokenizer commands.
|
43
|
+
# @option options [String] :language The language to use for the
|
44
|
+
# tokenization process.
|
45
|
+
# @option options [TrueClass|FalseClass] :kaf When set to `true` the input
|
46
|
+
# is assumed to be KAF.
|
47
|
+
#
|
48
|
+
def initialize(options = {})
|
49
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
50
|
+
end
|
51
|
+
|
52
|
+
##
|
53
|
+
# Processes the input and returns an array containing the output of STDOUT,
|
54
|
+
# STDERR and an object containing process information.
|
55
|
+
#
|
56
|
+
# @param [String] input
|
57
|
+
# @return [Array]
|
58
|
+
#
|
59
|
+
def run(input)
|
60
|
+
|
61
|
+
if options[:kaf]
|
62
|
+
language, input = kaf_elements(input)
|
63
|
+
else
|
64
|
+
language = options[:language]
|
65
|
+
end
|
66
|
+
|
67
|
+
unless valid_language?(language)
|
68
|
+
raise ArgumentError, "The specified language (#{language}) is invalid"
|
69
|
+
end
|
70
|
+
|
71
|
+
kernel = language_constant(language).new(:args => options[:args])
|
72
|
+
|
73
|
+
return Open3.capture3(*kernel.command.split(" "), :stdin_data => input)
|
74
|
+
end
|
75
|
+
|
76
|
+
alias tokenize run
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
##
|
81
|
+
# Returns an Array containing the language an input from a KAF document.
|
82
|
+
#
|
83
|
+
# @param [String] input The KAF document.
|
84
|
+
# @return [Array]
|
85
|
+
#
|
86
|
+
def kaf_elements(input)
|
87
|
+
document = Nokogiri::XML(input)
|
88
|
+
language = document.at('KAF').attr('xml:lang')
|
89
|
+
text = document.at('raw').text
|
90
|
+
|
91
|
+
return language, text
|
92
|
+
end
|
93
|
+
|
94
|
+
##
|
95
|
+
# @param [String] language
|
96
|
+
# @return [Class]
|
97
|
+
#
|
98
|
+
def language_constant(language)
|
99
|
+
Opener::Tokenizers.const_get(language.upcase)
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# @return [TrueClass|FalseClass]
|
104
|
+
#
|
105
|
+
def valid_language?(language)
|
106
|
+
return Opener::Tokenizers.const_defined?(language.upcase)
|
107
|
+
end
|
108
|
+
end # Tokenizer
|
109
|
+
end # Opener
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require File.expand_path('../lib/opener/tokenizer/version', __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = 'opener-tokenizer'
|
5
|
+
gem.version = Opener::Tokenizer::VERSION
|
6
|
+
gem.authors = ['development@olery.com']
|
7
|
+
gem.summary = 'Gem that wraps up the the tokenizer cores'
|
8
|
+
gem.description = gem.summary
|
9
|
+
gem.homepage = 'http://opener-project.github.com/'
|
10
|
+
gem.has_rdoc = "yard"
|
11
|
+
|
12
|
+
gem.required_ruby_version = '>= 1.9.2'
|
13
|
+
|
14
|
+
gem.files = Dir.glob([
|
15
|
+
'exec/**/*',
|
16
|
+
'lib/**/*',
|
17
|
+
'config.ru',
|
18
|
+
'*.gemspec',
|
19
|
+
'README.md'
|
20
|
+
]).select { |file| File.file?(file) }
|
21
|
+
|
22
|
+
gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
|
23
|
+
|
24
|
+
gem.add_dependency 'opener-tokenizer-base', '>= 0.3.1'
|
25
|
+
gem.add_dependency 'opener-webservice'
|
26
|
+
|
27
|
+
gem.add_dependency 'nokogiri'
|
28
|
+
gem.add_dependency 'sinatra', '~>1.4.2'
|
29
|
+
gem.add_dependency 'httpclient'
|
30
|
+
gem.add_dependency 'opener-daemons'
|
31
|
+
|
32
|
+
gem.add_development_dependency 'rspec'
|
33
|
+
gem.add_development_dependency 'cucumber'
|
34
|
+
gem.add_development_dependency 'pry'
|
35
|
+
gem.add_development_dependency 'rake'
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,200 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: opener-tokenizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- development@olery.com
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: opener-tokenizer-base
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.3.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.3.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: opener-webservice
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: sinatra
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.4.2
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 1.4.2
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: httpclient
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: opener-daemons
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rspec
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: cucumber
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: pry
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: rake
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
description: Gem that wraps up the the tokenizer cores
|
154
|
+
email:
|
155
|
+
executables:
|
156
|
+
- tokenizer
|
157
|
+
- tokenizer-daemon
|
158
|
+
- tokenizer-server
|
159
|
+
extensions: []
|
160
|
+
extra_rdoc_files: []
|
161
|
+
files:
|
162
|
+
- README.md
|
163
|
+
- bin/tokenizer
|
164
|
+
- bin/tokenizer-daemon
|
165
|
+
- bin/tokenizer-server
|
166
|
+
- config.ru
|
167
|
+
- exec/tokenizer.rb
|
168
|
+
- lib/opener/tokenizer.rb
|
169
|
+
- lib/opener/tokenizer/cli.rb
|
170
|
+
- lib/opener/tokenizer/public/markdown.css
|
171
|
+
- lib/opener/tokenizer/server.rb
|
172
|
+
- lib/opener/tokenizer/version.rb
|
173
|
+
- lib/opener/tokenizer/views/index.erb
|
174
|
+
- lib/opener/tokenizer/views/result.erb
|
175
|
+
- opener-tokenizer.gemspec
|
176
|
+
homepage: http://opener-project.github.com/
|
177
|
+
licenses: []
|
178
|
+
metadata: {}
|
179
|
+
post_install_message:
|
180
|
+
rdoc_options: []
|
181
|
+
require_paths:
|
182
|
+
- lib
|
183
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - ">="
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: 1.9.2
|
188
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
189
|
+
requirements:
|
190
|
+
- - ">="
|
191
|
+
- !ruby/object:Gem::Version
|
192
|
+
version: '0'
|
193
|
+
requirements: []
|
194
|
+
rubyforge_project:
|
195
|
+
rubygems_version: 2.2.2
|
196
|
+
signing_key:
|
197
|
+
specification_version: 4
|
198
|
+
summary: Gem that wraps up the the tokenizer cores
|
199
|
+
test_files: []
|
200
|
+
has_rdoc: yard
|