stanford_corenlp_xml_adapter 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d69c2dce1695642753623b33a96043d6a016568a
|
4
|
+
data.tar.gz: 46c31da9aac985cd9b44baa0caea8dbaef5f3606
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a48091dddb26039bda934f8736b2bb3d0ff2274073e341c9034e660ed9791127f3f65b157b0e8175fb7f71105e208893cf37ea3eddcd922c37143a27c9b954b5
|
7
|
+
data.tar.gz: fd6135d6b659e31d4c5296e7b0b145929e3855bc03d3ea47a85b810b31080a85721b0eeee53bd0f71a1a9d6c4ee761dd504c9e7a0cc453662b5da1a22776ffea
|
data/bin/bump-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
gem bump --version minor --remote --release
|
@@ -4,6 +4,8 @@ require "stanford_corenlp_xml_adapter/nokogiri_misc_mixins"
|
|
4
4
|
require "stanford_corenlp_xml_adapter/coreference"
|
5
5
|
require "stanford_corenlp_xml_adapter/dependency_parse"
|
6
6
|
|
7
|
+
include DependencyParse
|
8
|
+
|
7
9
|
class Nokogiri::XML::Document
|
8
10
|
include NokogiriMiscellaneousMixins
|
9
11
|
include Coreference
|
@@ -11,7 +13,7 @@ end
|
|
11
13
|
|
12
14
|
class Nokogiri::XML::Element
|
13
15
|
include NokogiriMiscellaneousMixins
|
14
|
-
include DependencyParse
|
16
|
+
#include DependencyParse
|
15
17
|
end
|
16
18
|
|
17
19
|
module StanfordCorenlpXmlAdapter
|
@@ -1,56 +1,66 @@
|
|
1
1
|
module DependencyParse
|
2
|
-
def basic_dependencies
|
3
|
-
dependencies_for_type 'basic-dependencies'
|
2
|
+
def basic_dependencies doc, sentence_index
|
3
|
+
dependencies_for_type doc, sentence_index, 'basic-dependencies'
|
4
4
|
end
|
5
5
|
|
6
|
-
def collapsed_dependencies
|
7
|
-
dependencies_for_type 'collapsed-dependencies'
|
6
|
+
def collapsed_dependencies doc, sentence_index
|
7
|
+
dependencies_for_type doc, sentence_index, 'collapsed-dependencies'
|
8
8
|
end
|
9
9
|
|
10
|
-
def collapsed_ccprocessed_dependencies
|
11
|
-
dependencies_for_type 'collapsed-ccprocessed-dependencies'
|
10
|
+
def collapsed_ccprocessed_dependencies doc, sentence_index
|
11
|
+
dependencies_for_type doc, sentence_index, 'collapsed-ccprocessed-dependencies'
|
12
12
|
end
|
13
13
|
|
14
|
-
def
|
15
|
-
dependencies_for_type '
|
14
|
+
def enhanced_dependencies doc, sentence_index
|
15
|
+
dependencies_for_type doc, sentence_index, 'enhanced-dependencies'
|
16
16
|
end
|
17
17
|
|
18
|
-
def
|
19
|
-
dependencies_for_type 'enhanced-dependencies'
|
18
|
+
def enhanced_plus_plus_dependencies doc, sentence_index
|
19
|
+
dependencies_for_type doc, sentence_index, 'enhanced-plus-plus-dependencies'
|
20
20
|
end
|
21
21
|
|
22
|
-
def
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
def dependencies_for_type type
|
22
|
+
def dependencies_for_type doc, sentence_index, type
|
23
|
+
sentence = doc.sentences[sentence_index]
|
24
|
+
sentence_tokens = sentence.tokens
|
27
25
|
dependency_parse_nok_to_blob(
|
28
|
-
|
26
|
+
sentence.xpath(".//dependencies[@type=\"#{type}\"]"),
|
27
|
+
sentence_tokens.map{|t| t.pos.text},
|
28
|
+
sentence_tokens.map{|t| t.ner.text},
|
29
29
|
)
|
30
30
|
end
|
31
31
|
|
32
|
-
def dependency_parse_nok_to_blob
|
33
|
-
|
32
|
+
def dependency_parse_nok_to_blob dep_parse_input, pos_tags_input, ner_tags_input
|
33
|
+
dep_parse_input
|
34
34
|
.children
|
35
35
|
.select{|dep| dep.name == 'dep'}
|
36
|
-
.
|
36
|
+
.each_with_index
|
37
|
+
.map{|dep| dependency_nok_to_blob(dep, pos_tags_input, ner_tags_input)}
|
37
38
|
end
|
38
39
|
|
39
|
-
def dependency_nok_to_blob
|
40
|
+
def dependency_nok_to_blob dep_parse_input, pos_tags_input, ner_tags_input
|
40
41
|
{
|
41
|
-
type:
|
42
|
-
governor: dependency_slice_nok_to_blob(
|
43
|
-
|
42
|
+
type: dep_parse_input.attributes['type'].value,
|
43
|
+
governor: dependency_slice_nok_to_blob(
|
44
|
+
'governor', dep_parse_input, pos_tags_input, ner_tags_input
|
45
|
+
),
|
46
|
+
dependent: dependency_slice_nok_to_blob(
|
47
|
+
'dependent', dep_parse_input, pos_tags_input, ner_tags_input
|
48
|
+
)
|
44
49
|
}
|
45
50
|
end
|
46
51
|
|
47
|
-
def dependency_slice_nok_to_blob name,
|
48
|
-
dep =
|
52
|
+
def dependency_slice_nok_to_blob name, dep_parse_input, pos_tags_input, ner_tags_input
|
53
|
+
dep = dep_parse_input.children
|
49
54
|
.select{|dep| dep.name == name}
|
50
55
|
.first
|
56
|
+
idx = dep.attributes['idx'].value.to_i
|
57
|
+
pos = idx > 0 ? pos_tags_input[idx - 1] : nil
|
58
|
+
ner = idx > 0 && ner_tags_input[idx - 1] != 'O' ? ner_tags_input[idx - 1] : nil
|
51
59
|
{
|
52
|
-
idx:
|
53
|
-
value: dep.text
|
60
|
+
idx: idx,
|
61
|
+
value: dep.text,
|
62
|
+
pos: pos,
|
63
|
+
ner: ner
|
54
64
|
}
|
55
65
|
end
|
56
66
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stanford_corenlp_xml_adapter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joshweir
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -181,6 +181,7 @@ files:
|
|
181
181
|
- LICENSE
|
182
182
|
- README.md
|
183
183
|
- Rakefile
|
184
|
+
- bin/bump-version
|
184
185
|
- bin/console
|
185
186
|
- bin/docker-web-dev
|
186
187
|
- bin/docker-web-dev-bash
|