opener-opinion-detector-base 2.0.1 → 2.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/core/python-scripts/README.md +78 -3
  3. data/core/python-scripts/classify_kaf_naf_file.py +94 -94
  4. data/core/python-scripts/models.cfg +1 -0
  5. data/core/python-scripts/scripts/config_manager.py +3 -0
  6. data/core/python-scripts/scripts/extract_features.py +0 -3
  7. data/core/python-scripts/scripts/relation_classifier.py +1 -1
  8. data/core/vendor/src/crfsuite/crfsuite.sln +42 -42
  9. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -26
  10. data/ext/hack/Rakefile +5 -2
  11. data/lib/opener/opinion_detectors/base.rb +19 -15
  12. data/lib/opener/opinion_detectors/base/version.rb +1 -1
  13. data/lib/opener/opinion_detectors/configuration_creator.rb +6 -8
  14. data/lib/opener/opinion_detectors/de.rb +1 -1
  15. data/lib/opener/opinion_detectors/es.rb +7 -0
  16. data/lib/opener/opinion_detectors/fr.rb +7 -0
  17. data/opener-opinion-detector-base.gemspec +0 -1
  18. data/pre_install_requirements.txt +3 -0
  19. metadata +41 -85
  20. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  21. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  22. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +0 -10
  23. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +0 -22
  24. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +0 -1
  25. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +0 -47
  26. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +0 -1
  27. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +0 -390
  28. data/core/site-packages/pre_build/KafNafParser/__init__.py +0 -14
  29. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +0 -125
  30. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +0 -52
  31. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +0 -78
  32. data/core/site-packages/pre_build/KafNafParser/entity_data.py +0 -59
  33. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +0 -41
  34. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +0 -2
  35. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +0 -205
  36. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +0 -309
  37. data/core/site-packages/pre_build/KafNafParser/features_data.py +0 -131
  38. data/core/site-packages/pre_build/KafNafParser/header_data.py +0 -127
  39. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +0 -211
  40. data/core/site-packages/pre_build/KafNafParser/references_data.py +0 -23
  41. data/core/site-packages/pre_build/KafNafParser/span_data.py +0 -63
  42. data/core/site-packages/pre_build/KafNafParser/term_data.py +0 -111
  43. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +0 -42
  44. data/core/site-packages/pre_build/KafNafParser/text_data.py +0 -99
  45. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +0 -10
  46. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +0 -14
  47. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +0 -1
  48. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +0 -23
  49. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +0 -1
  50. data/core/site-packages/pre_build/VUA_pylib/__init__.py +0 -1
  51. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +0 -1
  52. data/core/site-packages/pre_build/VUA_pylib/common/common.py +0 -28
  53. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +0 -1
  54. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +0 -156
  55. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +0 -1
  56. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +0 -121
  57. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +0 -1
  58. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +0 -72
  59. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +0 -10
  60. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +0 -7
  61. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +0 -1
  62. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +0 -11
  63. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +0 -1
  64. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +0 -165
  65. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +0 -439
  66. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +0 -7
  67. data/pre_build_requirements.txt +0 -3
@@ -1,14 +0,0 @@
1
- from KafNafParserMod import *
2
- from header_data import *
3
- from external_references_data import *
4
- from span_data import *
5
- from term_data import *
6
- from term_sentiment_data import *
7
- from text_data import *
8
- from entity_data import *
9
- from features_data import *
10
- from opinion_data import *
11
- from dependency_data import *
12
- from constituency_data import *
13
- from references_data import *
14
- from coreference_data import *
@@ -1,125 +0,0 @@
1
- from lxml import etree
2
- from lxml.objectify import dump
3
- from span_data import Cspan
4
-
5
-
6
- class Cnonterminal:
7
- def __init__(self,node=None):
8
- if node is None:
9
- self.node = etree.Element('nt')
10
- else:
11
- self.node = node
12
-
13
- def get_id(self):
14
- return self.node.get('id')
15
-
16
- def get_label(self):
17
- return self.node.get('label')
18
-
19
- def __str__(self):
20
- return dump(self.node)
21
-
22
-
23
-
24
- class Cterminal:
25
- def __init__(self,node=None):
26
- if node is None:
27
- self.node = etree.Element('t')
28
- else:
29
- self.node = node
30
-
31
- def get_id(self):
32
- return self.node.get('id')
33
-
34
- def get_span(self):
35
- span_node = self.node.find('span')
36
- return Cspan(span_node)
37
-
38
- def __str__(self):
39
- return dump(self.node)
40
-
41
- class Cedge:
42
- def __init__(self,node=None):
43
- if node is None:
44
- self.node = etree.Element('edge')
45
- else:
46
- self.node = node
47
-
48
- def __str__(self):
49
- return dump(self.node)
50
-
51
- def get_from(self):
52
- return self.node.get('from')
53
-
54
- def get_to(self):
55
- return self.node.get('to')
56
-
57
-
58
-
59
- class Ctree:
60
- def __init__(self,node=None):
61
- if node is None:
62
- self.node = etree.Element('tree')
63
- else:
64
- self.node = node
65
-
66
-
67
- def __str__(self):
68
- return dump(self.node)
69
-
70
- ## Fore getting non terminals
71
- def __get_nt_nodes(self):
72
- for nt_node in self.node.findall('nt'):
73
- yield nt_node
74
-
75
- def get_non_terminals(self):
76
- for nt_node in self.__get_nt_nodes():
77
- yield Cnonterminal(nt_node)
78
- ##################################
79
-
80
- ## Fore getting terminals
81
- def __get_t_nodes(self):
82
- for t_node in self.node.findall('t'):
83
- yield t_node
84
-
85
- def get_terminals(self):
86
- for t_node in self.__get_t_nodes():
87
- yield Cterminal(t_node)
88
- ##################################
89
-
90
- ## Fore getting edges
91
- def __get_edge_nodes(self):
92
- for t_node in self.node.findall('edge'):
93
- yield t_node
94
-
95
- def get_edges(self):
96
- for edge_node in self.__get_edge_nodes():
97
- yield Cedge(edge_node)
98
- ##################################
99
-
100
-
101
-
102
- class Cconstituency:
103
- def __init__(self,node=None):
104
- self.type = 'NAF/NAF'
105
- if node is None:
106
- self.node = etree.Element('constituency')
107
- else:
108
- self.node = node
109
-
110
- def to_kaf(self):
111
- pass
112
-
113
- def to_naf(self):
114
- pass
115
-
116
- def __get_tree_nodes(self):
117
- for tree_node in self.node.findall('tree'):
118
- yield tree_node
119
-
120
- def get_trees(self):
121
- for tree_node in self.__get_tree_nodes():
122
- yield Ctree(tree_node)
123
-
124
- def __str__(self):
125
- return dump(self.node)
@@ -1,52 +0,0 @@
1
- from lxml import etree
2
- from span_data import Cspan
3
-
4
- class Ccoreference:
5
- def __init__(self,node=None,type='NAF'):
6
- self.type = type
7
- if node is None:
8
- self.node = etree.Element('coref')
9
- else:
10
- self.node = node
11
-
12
- def get_id(self):
13
- if self.type == 'NAF':
14
- return self.node.get('id')
15
- elif self.type == 'KAF':
16
- return self.node.get('coid')
17
-
18
- def get_spans(self):
19
- for node_span in self.node.findall('span'):
20
- yield Cspan(node_span)
21
-
22
-
23
-
24
- class Ccoreferences:
25
- def __init__(self,node=None, type='NAF'):
26
- self.type = type
27
- if node is None:
28
- self.node = etree.Element('coreferences')
29
- else:
30
- self.node = node
31
-
32
- def __get_corefs_nodes(self):
33
- for coref_node in self.node.findall('coref'):
34
- yield coref_node
35
-
36
- def get_corefs(self):
37
- for coref_node in self.__get_corefs_nodes():
38
- yield Ccoreference(coref_node,self.type)
39
-
40
- def to_kaf(self):
41
- if self.type == 'NAF':
42
- for node_coref in self.__get_corefs_nodes():
43
- node_coref.set('coid',node_coref.get('id'))
44
- del node_coref.attrib['id']
45
-
46
- def to_naf(self):
47
- if self.type == 'KAF':
48
- for node_coref in self.__get_corefs_nodes():
49
- node_coref.set('id',node_coref.get('coid'))
50
- del node_coref.attrib['coid']
51
-
52
-
@@ -1,78 +0,0 @@
1
- from lxml import etree
2
- #from lxml.objectify import dump
3
-
4
-
5
- class Cdependency:
6
- def __init__(self,node=None):
7
- if node is None:
8
- self.node = etree.Element('dep')
9
- else:
10
- self.node = node
11
-
12
- def get_node_comment(self):
13
- return self.node_comment
14
-
15
- def get_node(self):
16
- return self.node
17
-
18
- def get_from(self):
19
- return self.node.get('from')
20
-
21
- def get_to(self):
22
- return self.node.get('to')
23
-
24
- def get_function(self):
25
- return self.node.get('rfunc')
26
-
27
- def set_from(self, f):
28
- self.node.set('from',f)
29
-
30
- def set_to(self,t):
31
- self.node.set('to',t)
32
-
33
- def set_function(self,f):
34
- self.node.set('rfunc',f)
35
-
36
-
37
- def set_comment(self,c):
38
- c = c.replace('--','- -')
39
- self.node.insert(0,etree.Comment(c) )
40
-
41
-
42
- def __str__(self):
43
- return dump(self.node)
44
-
45
-
46
-
47
- class Cdependencies:
48
- def __init__(self,node=None):
49
- if node is None:
50
- self.node = etree.Element('deps')
51
- else:
52
- self.node = node
53
-
54
- def get_node(self):
55
- return self.node
56
-
57
- def to_kaf(self):
58
- pass
59
-
60
- def to_naf(self):
61
- pass
62
-
63
- def __str__(self):
64
- return dump(self.node)
65
-
66
-
67
- def __get_node_deps(self):
68
- for node_dep in self.node.findall('dep'):
69
- yield node_dep
70
-
71
- def get_dependencies(self):
72
- for node in self.__get_node_deps():
73
- yield Cdependency(node)
74
-
75
-
76
- def add_dependency(self,my_dep):
77
- self.node.append(my_dep.get_node())
78
-
@@ -1,59 +0,0 @@
1
- ## Modified for KAF NAF adaptation
2
- from lxml import etree
3
- from lxml.objectify import dump
4
- from references_data import *
5
-
6
-
7
- class Centity:
8
- def __init__(self,node=None,type='NAF'):
9
- self.type = type
10
- if node is None:
11
- self.node = etree.Element('entity')
12
- else:
13
- self.node = node
14
-
15
- def get_id(self):
16
- if self.type == 'NAF':
17
- return self.node.get('id')
18
- elif self.type == 'KAF':
19
- return self.node.get('eid')
20
-
21
- def get_type(self):
22
- return self.node.get('type')
23
-
24
- def get_references(self):
25
- for ref_node in self.node.findall('references'):
26
- yield Creferences(ref_node)
27
-
28
- class Centities:
29
- def __init__(self,node=None,type='NAF'):
30
- self.type = type
31
- if node is None:
32
- self.node = etree.Element('entities')
33
- else:
34
- self.node = node
35
-
36
-
37
- def to_kaf(self):
38
- if self.type == 'NAF':
39
- for node in self.__get_entity_nodes():
40
- node.set('eid',node.get('id'))
41
- del node.attrib['id']
42
-
43
- def to_naf(self):
44
- if self.type == 'KAF':
45
- for node in self.__get_entity_nodes():
46
- node.set('id',node.get('eid'))
47
- del node.attrib['eid']
48
-
49
- def __get_entity_nodes(self):
50
- for ent_node in self.node.findall('entity'):
51
- yield ent_node
52
-
53
- def __iter__(self):
54
- for ent_node in self.__get_entity_nodes():
55
- yield Centity(ent_node,self.type)
56
-
57
-
58
- def __str__(self):
59
- return dump(self.node)
@@ -1,41 +0,0 @@
1
- # included modification for KAF/NAF
2
- from term_sentiment_data import Cterm_sentiment
3
- from lxml import etree
4
-
5
- class CexternalReference:
6
- def __init__(self,node=None):
7
- self.type= 'NAF/KAF'
8
- #self.resource = self.reference = self.reftype = self.status = self.source = self.confidence = ''
9
- if node is None:
10
- self.node = etree.Element('externalRef')
11
- else:
12
- self.node = node
13
-
14
- def get_node(self):
15
- return self.node
16
-
17
- def set_resource(self,resource):
18
- self.node.set('resource',resource)
19
-
20
- def set_confidence(self,confidence):
21
- self.node.set('confidence',confidence)
22
-
23
- def set_reference(self,reference):
24
- self.node.set('reference',reference)
25
-
26
-
27
- class CexternalReferences:
28
- def __init__(self,node=None):
29
- if node is None:
30
- self.node = etree.Element('externalReferences')
31
- else:
32
- self.node = node
33
-
34
- def add_external_reference(self,ext_ref):
35
- self.node.append(ext_ref.get_node())
36
-
37
- def get_node(self):
38
- return self.node
39
-
40
-
41
-
@@ -1,2 +0,0 @@
1
- from dependency import *
2
- from constituency import *
@@ -1,205 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- from operator import itemgetter
4
-
5
- '''
6
- Extract information from the contituent layer from a NAF file
7
- '''
8
-
9
- class Cconstituency_extractor:
10
- def __init__(self,knaf_obj):
11
- self.naf = knaf_obj
12
- #Extract terminals, non terminals and edges
13
- ## Extracted directly from
14
- self.terminals = {} #terminal id --> list term ids
15
- self.terminal_for_term = {} #term id --> terminal id
16
- self.label_for_nonter = {} # nonter --> label
17
- self.reachable_from = {} # node_from --> [nodeto1, nodeto2...]
18
-
19
- self.extract_info_from_naf(knaf_obj)
20
-
21
- #Extracting all posible paths from leave to root for each terminal id
22
- self.paths_for_terminal= {}
23
- for terminal_id in self.terminals.keys():
24
- paths = self.__expand_node(terminal_id,False)
25
- self.paths_for_terminal[terminal_id] = paths
26
- #######################################
27
-
28
- ## Create, for each non terminal, which are the terminals subsumed
29
- self.terms_subsumed_by_nonter = {} ## ['nonter12'] = set('t1,'t2','t3','t4')
30
- for terminal_id, span_terms in self.terminals.items():
31
- for path in self.paths_for_terminal[terminal_id]:
32
- for nonter in path:
33
- if nonter not in self.terms_subsumed_by_nonter:
34
- self.terms_subsumed_by_nonter[nonter] = set()
35
- for termid in span_terms:
36
- self.terms_subsumed_by_nonter[nonter].add(termid)
37
-
38
- ## To print the paths calculated
39
- # for terminal in self.terminals.keys():
40
- # print terminal
41
- # for path in self.paths_for_terminal[terminal]:
42
- # sep=' '
43
- # for node in path:
44
- # print sep,node,self.label_for_nonter.get(node,'?')
45
- # sep+=' '
46
- # print '#'*20
47
-
48
-
49
- def get_deepest_phrases(self):
50
- all_nonter = set()
51
- for terminal in self.terminals.keys():
52
- for path in self.paths_for_terminal[terminal]:
53
- first_non_ter_phrase = path[1]
54
- all_nonter.add(first_non_ter_phrase)
55
-
56
- ter_for_nonter = {}
57
- for nonter in all_nonter:
58
- for terminal in self.terminals.keys():
59
- for path in self.paths_for_terminal[terminal]:
60
- if nonter in path:
61
- if nonter in ter_for_nonter:
62
- ter_for_nonter[nonter].append(terminal)
63
- else:
64
- ter_for_nonter[nonter] = [terminal]
65
-
66
- visited = set()
67
- for nonter, list_term in ter_for_nonter.items():
68
- for ter in list_term:
69
-
70
- visited.add(ter)
71
-
72
-
73
- ### Returns the label of the deepest phrase for the term id (termid as in the term layer)
74
- def get_deepest_phrase_for_termid(self,termid):
75
- terminal_id = self.terminal_for_term.get(termid)
76
- label = None
77
- subsumed = []
78
- if terminal_id is not None:
79
- first_path = self.paths_for_terminal[terminal_id][0]
80
- first_phrase_id = first_path[1]
81
- label = self.label_for_nonter.get(first_phrase_id)
82
- subsumed = self.terms_subsumed_by_nonter.get(first_phrase_id,[])
83
- return label,sorted(list(subsumed))
84
-
85
-
86
- def get_least_common_subsumer(self,from_tid,to_tid):
87
- termid_from = self.terminal_for_term.get(from_tid)
88
- termid_to = self.terminal_for_term.get(to_tid)
89
-
90
- path_from = self.paths_for_terminal[termid_from][0]
91
- path_to = self.paths_for_terminal[termid_to][0]
92
- common_nodes = set(path_from) & set(path_to)
93
- if len(common_nodes) == 0:
94
- return None
95
- else:
96
- indexes = []
97
- for common_node in common_nodes:
98
- index1 = path_from.index(common_node)
99
- index2 = path_to.index(common_node)
100
- indexes.append((common_node,index1+index2))
101
- indexes.sort(key=itemgetter(1))
102
- shortest_common = indexes[0][0]
103
- return shortest_common
104
-
105
-
106
- def get_path_from_to(self,from_tid, to_tid):
107
- shortest_subsumer = self.get_least_common_subsumer(from_tid, to_tid)
108
-
109
- #print 'From:',self.naf.get_term(from_tid).get_lemma()
110
- #print 'To:',self.naf.get_term(to_tid).get_lemma()
111
- termid_from = self.terminal_for_term.get(from_tid)
112
- termid_to = self.terminal_for_term.get(to_tid)
113
-
114
- path_from = self.paths_for_terminal[termid_from][0]
115
- path_to = self.paths_for_terminal[termid_to][0]
116
-
117
- if shortest_subsumer is None:
118
- return None
119
-
120
- complete_path = []
121
- for node in path_from:
122
- complete_path.append(node)
123
- if node == shortest_subsumer: break
124
-
125
- begin=False
126
- for node in path_to[-1::-1]:
127
- if begin:
128
- complete_path.append(node)
129
-
130
- if node==shortest_subsumer:
131
- begin=True
132
- labels = [self.label_for_nonter[nonter] for nonter in complete_path]
133
- return labels
134
-
135
-
136
- def get_path_for_termid(self,termid):
137
- terminal_id = self.terminal_for_term.get(termid)
138
- paths = self.paths_for_terminal[terminal_id]
139
- labels = [self.label_for_nonter[nonter] for nonter in paths[0]]
140
- return labels
141
-
142
- def extract_info_from_naf(self,knaf_obj):
143
- ## Generated internally
144
- # For each terminal node, a list of paths through all the edges
145
- self.paths_for_terminal = {}
146
- for tree in knaf_obj.get_trees():
147
- for terminal in tree.get_terminals():
148
- ter_id = terminal.get_id()
149
- span_ids = terminal.get_span().get_span_ids()
150
- self.terminals[ter_id] = span_ids
151
- for this_id in span_ids:
152
- self.terminal_for_term[this_id] = ter_id
153
-
154
-
155
- for non_terminal in tree.get_non_terminals():
156
- nonter_id = non_terminal.get_id()
157
- label = non_terminal.get_label()
158
- self.label_for_nonter[nonter_id] = label
159
-
160
-
161
- for edge in tree.get_edges():
162
- node_from = edge.get_from()
163
- node_to = edge.get_to()
164
- if node_from not in self.reachable_from:
165
- self.reachable_from[node_from] = [node_to]
166
- else:
167
- self.reachable_from[node_from].append(node_to)
168
-
169
-
170
-
171
- ##Recursive function
172
- ## Propagates the node through all the relations extracte from the edges information
173
- ## It returns a list of lists, one for each path
174
- ## Include_this_node is used for avoiding the first node
175
- def __expand_node(self,node,include_this_node=True):
176
- paths = []
177
- possible_nodes = self.reachable_from.get(node,[])
178
- if len(possible_nodes) == 0:
179
- return [[node]]
180
- else:
181
- for possible_node in possible_nodes:
182
- new_paths = self.__expand_node(possible_node)
183
- for path in new_paths:
184
- if include_this_node:
185
- path.insert(0,node)
186
- paths.append(path)
187
- return paths
188
-
189
- def get_chunks(self,chunk_type):
190
- for nonter,this_type in self.label_for_nonter.items():
191
- if this_type == chunk_type:
192
- subsumed = self.terms_subsumed_by_nonter.get(nonter)
193
- if subsumed is not None:
194
- yield sorted(list(subsumed))
195
-
196
- def get_all_chunks_for_term(self,termid):
197
- terminal_id = self.terminal_for_term.get(termid)
198
- paths = self.paths_for_terminal[terminal_id]
199
- for path in paths:
200
- for node in path:
201
- this_type = self.label_for_nonter[node]
202
- subsumed = self.terms_subsumed_by_nonter.get(node)
203
- if subsumed is not None:
204
- yield this_type,sorted(list(subsumed))
205
-