opener-property-tagger 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +62 -0
- data/bin/property-tagger +7 -0
- data/bin/property-tagger-server +8 -0
- data/config.ru +5 -0
- data/core/extract_aspects.py +18 -0
- data/core/hotel_property_tagger_nl_en.py +133 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
- data/ext/hack/Rakefile +13 -0
- data/ext/hack/support.rb +38 -0
- data/lib/opener/property_tagger.rb +86 -0
- data/lib/opener/property_tagger/cli.rb +84 -0
- data/lib/opener/property_tagger/public/markdown.css +284 -0
- data/lib/opener/property_tagger/server.rb +16 -0
- data/lib/opener/property_tagger/version.rb +5 -0
- data/lib/opener/property_tagger/views/index.erb +97 -0
- data/lib/opener/property_tagger/views/result.erb +15 -0
- data/opener-property-tagger.gemspec +37 -0
- data/pre_build_requirements.txt +1 -0
- metadata +183 -0
data/ext/hack/support.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'opener/build-tools'
|
2
|
+
|
3
|
+
include Opener::BuildTools::Requirements
|
4
|
+
include Opener::BuildTools::Python
|
5
|
+
include Opener::BuildTools::Files
|
6
|
+
|
7
|
+
# Directory where packages will be installed to.
|
8
|
+
PYTHON_SITE_PACKAGES = File.expand_path(
|
9
|
+
'../../../core/site-packages',
|
10
|
+
__FILE__
|
11
|
+
)
|
12
|
+
|
13
|
+
# Directory containing the temporary files.
|
14
|
+
TMP_DIRECTORY = File.expand_path('../../../tmp', __FILE__)
|
15
|
+
|
16
|
+
# Path to the pip requirements file used to install requirements before
|
17
|
+
# packaging the Gem.
|
18
|
+
PRE_BUILD_REQUIREMENTS = File.expand_path(
|
19
|
+
'../../../pre_build_requirements.txt',
|
20
|
+
__FILE__
|
21
|
+
)
|
22
|
+
|
23
|
+
# Path to the pip requirements file used to install requirements upon Gem
|
24
|
+
# installation.
|
25
|
+
PRE_INSTALL_REQUIREMENTS = File.expand_path(
|
26
|
+
'../../../pre_install_requirements.txt',
|
27
|
+
__FILE__
|
28
|
+
)
|
29
|
+
|
30
|
+
##
|
31
|
+
# Verifies the requirements to install thi Gem.
|
32
|
+
#
|
33
|
+
def verify_requirements
|
34
|
+
require_executable('python')
|
35
|
+
require_version('python', python_version, '2.6.0')
|
36
|
+
require_executable('pip')
|
37
|
+
require_version('pip', pip_version, '1.3.1')
|
38
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'open3'
|
2
|
+
require 'optparse'
|
3
|
+
|
4
|
+
require_relative 'property_tagger/version'
|
5
|
+
require_relative 'property_tagger/cli'
|
6
|
+
|
7
|
+
module Opener
|
8
|
+
##
|
9
|
+
# Ruby wrapper around the Python based polarity tagger.
|
10
|
+
#
|
11
|
+
# @!attribute [r] options
|
12
|
+
# @return [Hash]
|
13
|
+
#
|
14
|
+
class PropertyTagger
|
15
|
+
attr_reader :options, :args
|
16
|
+
|
17
|
+
##
|
18
|
+
# @param [Hash] options
|
19
|
+
#
|
20
|
+
# @option options [Array] :args Collection of arbitrary arguments to pass
|
21
|
+
# to the underlying kernel.
|
22
|
+
#
|
23
|
+
def initialize(options = {})
|
24
|
+
@args = options.delete(:args) || []
|
25
|
+
@options = options
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
# Returns a String containing the command to use for executing the kernel.
|
30
|
+
#
|
31
|
+
# @return [String]
|
32
|
+
#
|
33
|
+
def command
|
34
|
+
return "#{adjust_python_path} python -E -OO #{kernel} #{args.join(' ')}"
|
35
|
+
end
|
36
|
+
|
37
|
+
##
|
38
|
+
# Processes the input and returns an Array containing the output of STDOUT,
|
39
|
+
# STDERR and an object containing process information.
|
40
|
+
#
|
41
|
+
# @param [String] input The text of which to detect the language.
|
42
|
+
# @return [Array]
|
43
|
+
#
|
44
|
+
def run(input)
|
45
|
+
capture(input)
|
46
|
+
end
|
47
|
+
|
48
|
+
protected
|
49
|
+
##
|
50
|
+
# @return [String]
|
51
|
+
#
|
52
|
+
def adjust_python_path
|
53
|
+
site_packages = File.join(core_dir, 'site-packages')
|
54
|
+
"env PYTHONPATH=#{site_packages}:$PYTHONPATH"
|
55
|
+
end
|
56
|
+
|
57
|
+
##
|
58
|
+
# capture3 method doesn't work properly with Jruby, so
|
59
|
+
# this is a workaround
|
60
|
+
#
|
61
|
+
def capture(input)
|
62
|
+
Open3.popen3(*command.split(" ")) {|i, o, e, t|
|
63
|
+
out_reader = Thread.new { o.read }
|
64
|
+
err_reader = Thread.new { e.read }
|
65
|
+
i.write input
|
66
|
+
i.close
|
67
|
+
[out_reader.value, err_reader.value, t.value]
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
##
|
72
|
+
# @return [String]
|
73
|
+
#
|
74
|
+
def core_dir
|
75
|
+
return File.expand_path('../../../core', __FILE__)
|
76
|
+
end
|
77
|
+
|
78
|
+
##
|
79
|
+
# @return [String]
|
80
|
+
#
|
81
|
+
def kernel
|
82
|
+
return File.join(core_dir, 'hotel_property_tagger_nl_en.py')
|
83
|
+
end
|
84
|
+
end # PolarityTagger
|
85
|
+
end # Opener
|
86
|
+
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Opener
|
2
|
+
class PropertyTagger
|
3
|
+
##
|
4
|
+
# CLI wrapper around {Opener::PropertyTagger} using OptionParser.
|
5
|
+
#
|
6
|
+
# @!attribute [r] options
|
7
|
+
# @return [Hash]
|
8
|
+
# @!attribute [r] option_parser
|
9
|
+
# @return [OptionParser]
|
10
|
+
#
|
11
|
+
class CLI
|
12
|
+
attr_reader :options, :option_parser
|
13
|
+
|
14
|
+
##
|
15
|
+
# @param [Hash] options
|
16
|
+
#
|
17
|
+
def initialize(options = {})
|
18
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
19
|
+
|
20
|
+
@option_parser = OptionParser.new do |opts|
|
21
|
+
opts.program_name = 'polarity-tagger'
|
22
|
+
opts.summary_indent = ' '
|
23
|
+
|
24
|
+
opts.on('-h', '--help', 'Shows this help message') do
|
25
|
+
show_help
|
26
|
+
end
|
27
|
+
|
28
|
+
opts.on('-v', '--version', 'Shows the current version') do
|
29
|
+
show_version
|
30
|
+
end
|
31
|
+
|
32
|
+
opts.on('-l', '--log', 'Enable logging to STDERR') do
|
33
|
+
@options[:logging] = true
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.separator <<-EOF
|
37
|
+
|
38
|
+
Examples:
|
39
|
+
|
40
|
+
cat example.kaf | #{opts.program_name} # Basic usage
|
41
|
+
cat example.kaf | #{opts.program_name} # Logs information to STDERR
|
42
|
+
EOF
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
##
|
47
|
+
# @param [String] input
|
48
|
+
#
|
49
|
+
def run(input)
|
50
|
+
option_parser.parse!(options[:args])
|
51
|
+
|
52
|
+
tagger = PropertyTagger.new(options)
|
53
|
+
|
54
|
+
stdout, stderr, process = tagger.run(input)
|
55
|
+
|
56
|
+
if process.success?
|
57
|
+
puts stdout
|
58
|
+
|
59
|
+
if options[:logging] and !stderr.empty?
|
60
|
+
STDERR.puts(stderr)
|
61
|
+
end
|
62
|
+
else
|
63
|
+
abort stderr
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
|
69
|
+
##
|
70
|
+
# Shows the help message and exits the program.
|
71
|
+
#
|
72
|
+
def show_help
|
73
|
+
abort option_parser.to_s
|
74
|
+
end
|
75
|
+
|
76
|
+
##
|
77
|
+
# Shows the version and exits the program.
|
78
|
+
#
|
79
|
+
def show_version
|
80
|
+
abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
|
81
|
+
end
|
82
|
+
end # CLI
|
83
|
+
end # PropertyTagger
|
84
|
+
end # Opener
|
@@ -0,0 +1,284 @@
|
|
1
|
+
|
2
|
+
input[type="text"], textarea
|
3
|
+
{
|
4
|
+
width: 500px;
|
5
|
+
}
|
6
|
+
|
7
|
+
body {
|
8
|
+
font-family: Helvetica, arial, sans-serif;
|
9
|
+
font-size: 14px;
|
10
|
+
line-height: 1.6;
|
11
|
+
padding-top: 10px;
|
12
|
+
padding-bottom: 10px;
|
13
|
+
background-color: white;
|
14
|
+
padding: 30px; }
|
15
|
+
|
16
|
+
body > *:first-child {
|
17
|
+
margin-top: 0 !important; }
|
18
|
+
body > *:last-child {
|
19
|
+
margin-bottom: 0 !important; }
|
20
|
+
|
21
|
+
a {
|
22
|
+
color: #4183C4; }
|
23
|
+
a.absent {
|
24
|
+
color: #cc0000; }
|
25
|
+
a.anchor {
|
26
|
+
display: block;
|
27
|
+
padding-left: 30px;
|
28
|
+
margin-left: -30px;
|
29
|
+
cursor: pointer;
|
30
|
+
position: absolute;
|
31
|
+
top: 0;
|
32
|
+
left: 0;
|
33
|
+
bottom: 0; }
|
34
|
+
|
35
|
+
h1, h2, h3, h4, h5, h6 {
|
36
|
+
margin: 20px 0 10px;
|
37
|
+
padding: 0;
|
38
|
+
font-weight: bold;
|
39
|
+
-webkit-font-smoothing: antialiased;
|
40
|
+
cursor: text;
|
41
|
+
position: relative; }
|
42
|
+
|
43
|
+
h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, h5:hover a.anchor, h6:hover a.anchor {
|
44
|
+
background: url("../../images/modules/styleguide/para.png") no-repeat 10px center;
|
45
|
+
text-decoration: none; }
|
46
|
+
|
47
|
+
h1 tt, h1 code {
|
48
|
+
font-size: inherit; }
|
49
|
+
|
50
|
+
h2 tt, h2 code {
|
51
|
+
font-size: inherit; }
|
52
|
+
|
53
|
+
h3 tt, h3 code {
|
54
|
+
font-size: inherit; }
|
55
|
+
|
56
|
+
h4 tt, h4 code {
|
57
|
+
font-size: inherit; }
|
58
|
+
|
59
|
+
h5 tt, h5 code {
|
60
|
+
font-size: inherit; }
|
61
|
+
|
62
|
+
h6 tt, h6 code {
|
63
|
+
font-size: inherit; }
|
64
|
+
|
65
|
+
h1 {
|
66
|
+
font-size: 28px;
|
67
|
+
color: black; }
|
68
|
+
|
69
|
+
h2 {
|
70
|
+
font-size: 24px;
|
71
|
+
border-bottom: 1px solid #cccccc;
|
72
|
+
color: black; }
|
73
|
+
|
74
|
+
h3 {
|
75
|
+
font-size: 18px; }
|
76
|
+
|
77
|
+
h4 {
|
78
|
+
font-size: 16px; }
|
79
|
+
|
80
|
+
h5 {
|
81
|
+
font-size: 14px; }
|
82
|
+
|
83
|
+
h6 {
|
84
|
+
color: #777777;
|
85
|
+
font-size: 14px; }
|
86
|
+
|
87
|
+
p, blockquote, ul, ol, dl, li, table, pre {
|
88
|
+
margin: 15px 0; }
|
89
|
+
|
90
|
+
hr {
|
91
|
+
background: transparent url("../../images/modules/pulls/dirty-shade.png") repeat-x 0 0;
|
92
|
+
border: 0 none;
|
93
|
+
color: #cccccc;
|
94
|
+
height: 4px;
|
95
|
+
padding: 0; }
|
96
|
+
|
97
|
+
body > h2:first-child {
|
98
|
+
margin-top: 0;
|
99
|
+
padding-top: 0; }
|
100
|
+
body > h1:first-child {
|
101
|
+
margin-top: 0;
|
102
|
+
padding-top: 0; }
|
103
|
+
body > h1:first-child + h2 {
|
104
|
+
margin-top: 0;
|
105
|
+
padding-top: 0; }
|
106
|
+
body > h3:first-child, body > h4:first-child, body > h5:first-child, body > h6:first-child {
|
107
|
+
margin-top: 0;
|
108
|
+
padding-top: 0; }
|
109
|
+
|
110
|
+
a:first-child h1, a:first-child h2, a:first-child h3, a:first-child h4, a:first-child h5, a:first-child h6 {
|
111
|
+
margin-top: 0;
|
112
|
+
padding-top: 0; }
|
113
|
+
|
114
|
+
h1 p, h2 p, h3 p, h4 p, h5 p, h6 p {
|
115
|
+
margin-top: 0; }
|
116
|
+
|
117
|
+
li p.first {
|
118
|
+
display: inline-block; }
|
119
|
+
|
120
|
+
ul, ol {
|
121
|
+
padding-left: 30px; }
|
122
|
+
|
123
|
+
ul :first-child, ol :first-child {
|
124
|
+
margin-top: 0; }
|
125
|
+
|
126
|
+
ul :last-child, ol :last-child {
|
127
|
+
margin-bottom: 0; }
|
128
|
+
|
129
|
+
dl {
|
130
|
+
padding: 0; }
|
131
|
+
dl dt {
|
132
|
+
font-size: 14px;
|
133
|
+
font-weight: bold;
|
134
|
+
font-style: italic;
|
135
|
+
padding: 0;
|
136
|
+
margin: 15px 0 5px; }
|
137
|
+
dl dt:first-child {
|
138
|
+
padding: 0; }
|
139
|
+
dl dt > :first-child {
|
140
|
+
margin-top: 0; }
|
141
|
+
dl dt > :last-child {
|
142
|
+
margin-bottom: 0; }
|
143
|
+
dl dd {
|
144
|
+
margin: 0 0 15px;
|
145
|
+
padding: 0 15px; }
|
146
|
+
dl dd > :first-child {
|
147
|
+
margin-top: 0; }
|
148
|
+
dl dd > :last-child {
|
149
|
+
margin-bottom: 0; }
|
150
|
+
|
151
|
+
blockquote {
|
152
|
+
border-left: 4px solid #dddddd;
|
153
|
+
padding: 0 15px;
|
154
|
+
color: #777777; }
|
155
|
+
blockquote > :first-child {
|
156
|
+
margin-top: 0; }
|
157
|
+
blockquote > :last-child {
|
158
|
+
margin-bottom: 0; }
|
159
|
+
|
160
|
+
table {
|
161
|
+
padding: 0; }
|
162
|
+
table tr {
|
163
|
+
border-top: 1px solid #cccccc;
|
164
|
+
background-color: white;
|
165
|
+
margin: 0;
|
166
|
+
padding: 0; }
|
167
|
+
table tr:nth-child(2n) {
|
168
|
+
background-color: #f8f8f8; }
|
169
|
+
table tr th {
|
170
|
+
font-weight: bold;
|
171
|
+
border: 1px solid #cccccc;
|
172
|
+
text-align: left;
|
173
|
+
margin: 0;
|
174
|
+
padding: 6px 13px; }
|
175
|
+
table tr td {
|
176
|
+
border: 1px solid #cccccc;
|
177
|
+
text-align: left;
|
178
|
+
margin: 0;
|
179
|
+
padding: 6px 13px; }
|
180
|
+
table tr th :first-child, table tr td :first-child {
|
181
|
+
margin-top: 0; }
|
182
|
+
table tr th :last-child, table tr td :last-child {
|
183
|
+
margin-bottom: 0; }
|
184
|
+
|
185
|
+
img {
|
186
|
+
max-width: 100%; }
|
187
|
+
|
188
|
+
span.frame {
|
189
|
+
display: block;
|
190
|
+
overflow: hidden; }
|
191
|
+
span.frame > span {
|
192
|
+
border: 1px solid #dddddd;
|
193
|
+
display: block;
|
194
|
+
float: left;
|
195
|
+
overflow: hidden;
|
196
|
+
margin: 13px 0 0;
|
197
|
+
padding: 7px;
|
198
|
+
width: auto; }
|
199
|
+
span.frame span img {
|
200
|
+
display: block;
|
201
|
+
float: left; }
|
202
|
+
span.frame span span {
|
203
|
+
clear: both;
|
204
|
+
color: #333333;
|
205
|
+
display: block;
|
206
|
+
padding: 5px 0 0; }
|
207
|
+
span.align-center {
|
208
|
+
display: block;
|
209
|
+
overflow: hidden;
|
210
|
+
clear: both; }
|
211
|
+
span.align-center > span {
|
212
|
+
display: block;
|
213
|
+
overflow: hidden;
|
214
|
+
margin: 13px auto 0;
|
215
|
+
text-align: center; }
|
216
|
+
span.align-center span img {
|
217
|
+
margin: 0 auto;
|
218
|
+
text-align: center; }
|
219
|
+
span.align-right {
|
220
|
+
display: block;
|
221
|
+
overflow: hidden;
|
222
|
+
clear: both; }
|
223
|
+
span.align-right > span {
|
224
|
+
display: block;
|
225
|
+
overflow: hidden;
|
226
|
+
margin: 13px 0 0;
|
227
|
+
text-align: right; }
|
228
|
+
span.align-right span img {
|
229
|
+
margin: 0;
|
230
|
+
text-align: right; }
|
231
|
+
span.float-left {
|
232
|
+
display: block;
|
233
|
+
margin-right: 13px;
|
234
|
+
overflow: hidden;
|
235
|
+
float: left; }
|
236
|
+
span.float-left span {
|
237
|
+
margin: 13px 0 0; }
|
238
|
+
span.float-right {
|
239
|
+
display: block;
|
240
|
+
margin-left: 13px;
|
241
|
+
overflow: hidden;
|
242
|
+
float: right; }
|
243
|
+
span.float-right > span {
|
244
|
+
display: block;
|
245
|
+
overflow: hidden;
|
246
|
+
margin: 13px auto 0;
|
247
|
+
text-align: right; }
|
248
|
+
|
249
|
+
code, tt {
|
250
|
+
margin: 0 2px;
|
251
|
+
padding: 0 5px;
|
252
|
+
white-space: nowrap;
|
253
|
+
border: 1px solid #eaeaea;
|
254
|
+
background-color: #f8f8f8;
|
255
|
+
border-radius: 3px; }
|
256
|
+
|
257
|
+
pre code {
|
258
|
+
margin: 0;
|
259
|
+
padding: 0;
|
260
|
+
white-space: pre;
|
261
|
+
border: none;
|
262
|
+
background: transparent; }
|
263
|
+
|
264
|
+
.highlight pre {
|
265
|
+
background-color: #f8f8f8;
|
266
|
+
border: 1px solid #cccccc;
|
267
|
+
font-size: 13px;
|
268
|
+
line-height: 19px;
|
269
|
+
overflow: auto;
|
270
|
+
padding: 6px 10px;
|
271
|
+
border-radius: 3px; }
|
272
|
+
|
273
|
+
pre {
|
274
|
+
background-color: #f8f8f8;
|
275
|
+
border: 1px solid #cccccc;
|
276
|
+
font-size: 13px;
|
277
|
+
line-height: 19px;
|
278
|
+
overflow: auto;
|
279
|
+
padding: 6px 10px;
|
280
|
+
border-radius: 3px; }
|
281
|
+
pre code, pre tt {
|
282
|
+
background-color: transparent;
|
283
|
+
border: none; }
|
284
|
+
|