open_nlp 0.0.4-java → 0.0.5-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/open_nlp/chunker.rb +9 -2
- data/lib/open_nlp/parser.rb +0 -49
- data/lib/open_nlp/version.rb +1 -1
- metadata +2 -2
data/lib/open_nlp/chunker.rb
CHANGED
@@ -30,8 +30,15 @@ module OpenNlp
|
|
30
30
|
|
31
31
|
data.inject([]) do |acc, val|
|
32
32
|
chunk = val[2]
|
33
|
-
acc << [{val[0] => val[1]}] if chunk[0] == 'B'
|
34
|
-
|
33
|
+
acc << [{val[0] => val[1]}] if chunk[0] == 'B' # add token to chunk if it is a start of chunk
|
34
|
+
|
35
|
+
if chunk[0] == 'I'
|
36
|
+
if acc.last
|
37
|
+
acc.last << {val[0] => val[1]} # add token to chunk if it is a continuation of chunk
|
38
|
+
else
|
39
|
+
acc << [{val[0] => val[1]}] # add token to new chunk if no chunks exists
|
40
|
+
end
|
41
|
+
end
|
35
42
|
|
36
43
|
acc
|
37
44
|
end
|
data/lib/open_nlp/parser.rb
CHANGED
@@ -35,7 +35,6 @@ module OpenNlp
|
|
35
35
|
)
|
36
36
|
end
|
37
37
|
|
38
|
-
#code_tree @j_instance.parse(parse_obj)
|
39
38
|
Parser::Parse.new(@j_instance.parse(parse_obj))
|
40
39
|
end
|
41
40
|
|
@@ -49,53 +48,5 @@ module OpenNlp
|
|
49
48
|
|
50
49
|
offset
|
51
50
|
end
|
52
|
-
|
53
|
-
#def build_tree(parse_obj)
|
54
|
-
# span = parse_obj.getSpan
|
55
|
-
# start = span.getStart
|
56
|
-
# text = parse_obj.getText
|
57
|
-
# type = parse_obj.getType
|
58
|
-
#
|
59
|
-
# res = {}
|
60
|
-
# res[:type] = type unless type == Java::opennlp.tools.parser.AbstractBottomUpParser::TOK_NODE
|
61
|
-
#
|
62
|
-
# children = parse_obj.getChildren.inject([]) do |acc,c|
|
63
|
-
# s = c.span
|
64
|
-
#
|
65
|
-
# h = {}
|
66
|
-
#
|
67
|
-
# if start < s.getStart
|
68
|
-
# token = text[start..s.getStart-1]
|
69
|
-
# h[:token] = token unless token.strip.empty?
|
70
|
-
# end
|
71
|
-
#
|
72
|
-
# subtree = build_tree(c)
|
73
|
-
# h[:children] = subtree unless subtree.empty?
|
74
|
-
#
|
75
|
-
# start = s.getEnd
|
76
|
-
#
|
77
|
-
# acc << h
|
78
|
-
# acc
|
79
|
-
# end
|
80
|
-
#
|
81
|
-
# res[:token] = text[start..span.getEnd-1] if start < span.getEnd
|
82
|
-
#
|
83
|
-
# res[:children] = children unless children.empty?
|
84
|
-
#
|
85
|
-
# res
|
86
|
-
#end
|
87
|
-
|
88
|
-
def code_tree(parse_obj)
|
89
|
-
kids = parse_obj.getChildren
|
90
|
-
|
91
|
-
kids.inject([]) do |acc,kid|
|
92
|
-
data = {type: kid.getType, parent_type: parse_obj.getType, token: kid.toString}
|
93
|
-
subtree = code_tree(kid)
|
94
|
-
data[:children] = subtree unless subtree.empty?
|
95
|
-
acc << data
|
96
|
-
|
97
|
-
acc
|
98
|
-
end
|
99
|
-
end
|
100
51
|
end
|
101
52
|
end
|
data/lib/open_nlp/version.rb
CHANGED
metadata
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
name: open_nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.5
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- Hck
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-09-
|
12
|
+
date: 2012-09-28 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: JRuby tools wrapper for Apache OpenNLP
|
15
15
|
email:
|