opener-opinion-detector-base 2.0.1 → 2.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/core/python-scripts/README.md +78 -3
  3. data/core/python-scripts/classify_kaf_naf_file.py +94 -94
  4. data/core/python-scripts/models.cfg +1 -0
  5. data/core/python-scripts/scripts/config_manager.py +3 -0
  6. data/core/python-scripts/scripts/extract_features.py +0 -3
  7. data/core/python-scripts/scripts/relation_classifier.py +1 -1
  8. data/core/vendor/src/crfsuite/crfsuite.sln +42 -42
  9. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -26
  10. data/ext/hack/Rakefile +5 -2
  11. data/lib/opener/opinion_detectors/base.rb +19 -15
  12. data/lib/opener/opinion_detectors/base/version.rb +1 -1
  13. data/lib/opener/opinion_detectors/configuration_creator.rb +6 -8
  14. data/lib/opener/opinion_detectors/de.rb +1 -1
  15. data/lib/opener/opinion_detectors/es.rb +7 -0
  16. data/lib/opener/opinion_detectors/fr.rb +7 -0
  17. data/opener-opinion-detector-base.gemspec +0 -1
  18. data/pre_install_requirements.txt +3 -0
  19. metadata +41 -85
  20. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  21. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  22. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +0 -10
  23. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +0 -22
  24. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +0 -1
  25. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +0 -47
  26. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +0 -1
  27. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +0 -390
  28. data/core/site-packages/pre_build/KafNafParser/__init__.py +0 -14
  29. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +0 -125
  30. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +0 -52
  31. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +0 -78
  32. data/core/site-packages/pre_build/KafNafParser/entity_data.py +0 -59
  33. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +0 -41
  34. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +0 -2
  35. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +0 -205
  36. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +0 -309
  37. data/core/site-packages/pre_build/KafNafParser/features_data.py +0 -131
  38. data/core/site-packages/pre_build/KafNafParser/header_data.py +0 -127
  39. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +0 -211
  40. data/core/site-packages/pre_build/KafNafParser/references_data.py +0 -23
  41. data/core/site-packages/pre_build/KafNafParser/span_data.py +0 -63
  42. data/core/site-packages/pre_build/KafNafParser/term_data.py +0 -111
  43. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +0 -42
  44. data/core/site-packages/pre_build/KafNafParser/text_data.py +0 -99
  45. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +0 -10
  46. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +0 -14
  47. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +0 -1
  48. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +0 -23
  49. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +0 -1
  50. data/core/site-packages/pre_build/VUA_pylib/__init__.py +0 -1
  51. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +0 -1
  52. data/core/site-packages/pre_build/VUA_pylib/common/common.py +0 -28
  53. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +0 -1
  54. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +0 -156
  55. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +0 -1
  56. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +0 -121
  57. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +0 -1
  58. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +0 -72
  59. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +0 -10
  60. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +0 -7
  61. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +0 -1
  62. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +0 -11
  63. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +0 -1
  64. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +0 -165
  65. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +0 -439
  66. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +0 -7
  67. data/pre_build_requirements.txt +0 -3
@@ -1,99 +0,0 @@
1
- # included code for NAF/KAF
2
-
3
- from lxml import etree
4
-
5
-
6
- class Cwf:
7
- def __init__(self,node=None,type='NAF'):
8
- self.type = type
9
- ##self.id = '' self.sent = '' self.para = '' self.page = '' self.offset = '' self.lenght = '' s
10
- if node is None:
11
- self.node = etree.Element('wf')
12
- else:
13
- self.node = node
14
-
15
- def get_node(self):
16
- return self.node
17
-
18
- def set_id(self,this_id):
19
- if self.type == 'NAF':
20
- return self.node.set('id',this_id)
21
- elif self.type == 'KAF':
22
- return self.node.set('wid',this_id)
23
-
24
- def get_id(self):
25
- if self.type == 'NAF':
26
- return self.node.get('id')
27
- elif self.type == 'KAF':
28
- return self.node.get('wid')
29
-
30
- def set_text(self,this_text):
31
- self.node.text = this_text
32
-
33
- def get_text(self):
34
- return self.node.text
35
-
36
- def set_sent(self,this_sent):
37
- self.node.set('sent',this_sent)
38
-
39
- def get_sent(self):
40
- return self.node.get('sent')
41
-
42
-
43
- class Ctext:
44
- def __init__(self,node=None,type='NAF'):
45
- self.idx = {}
46
- self.type = type
47
- if node is None:
48
- self.node = etree.Element('text')
49
- else:
50
- self.node = node
51
- for wf_node in self.__get_wf_nodes():
52
- if self.type == 'NAF': label_id = 'id'
53
- elif self.type == 'KAF': label_id = 'wid'
54
- self.idx[wf_node.get(label_id)] = wf_node
55
-
56
- def get_node(self):
57
- return self.node
58
-
59
- def to_kaf(self):
60
- if self.type == 'NAF':
61
- self.type = 'KAF'
62
- for node in self.__get_wf_nodes():
63
- node.set('wid',node.get('id'))
64
- del node.attrib['id']
65
-
66
- def to_naf(self):
67
- if self.type == 'KAF':
68
- self.type = 'NAF'
69
- for node in self.__get_wf_nodes():
70
- node.set('id',node.get('wid'))
71
- del node.attrib['wid']
72
-
73
- def __get_wf_nodes(self):
74
- for wf_node in self.node.findall('wf'):
75
- yield wf_node
76
-
77
- def __iter__(self):
78
- for wf_node in self.__get_wf_nodes():
79
- yield Cwf(node=wf_node,type=self.type)
80
-
81
- def get_wf(self,token_id):
82
- wf_node = self.idx.get(token_id)
83
- if wf_node is not None:
84
- return Cwf(node=wf_node,type=self.type)
85
- else:
86
- return None
87
-
88
- def add_wf(self,wf_obj):
89
- self.node.append(wf_obj.get_node())
90
-
91
- def remove_tokens_of_sentence(self,sentence_id):
92
- nodes_to_remove = set()
93
- for wf in self:
94
- if wf.get_sent() == sentence_id:
95
- nodes_to_remove.add(wf.get_node())
96
-
97
- for node in nodes_to_remove:
98
- self.node.remove(node)
99
-
@@ -1,10 +0,0 @@
1
- Metadata-Version: 1.0
2
- Name: VUA-pylib
3
- Version: 1.5
4
- Summary: Various KAF / NAF python helpers
5
- Home-page: https://github.com/cltl/VUA_pylib
6
- Author: Ruben Izquierdo
7
- Author-email: r.izquierdobevia@vu.nl
8
- License: UNKNOWN
9
- Description: UNKNOWN
10
- Platform: UNKNOWN
@@ -1,14 +0,0 @@
1
- README
2
- VUA_pylib/__init__.py
3
- VUA_pylib.egg-info/PKG-INFO
4
- VUA_pylib.egg-info/SOURCES.txt
5
- VUA_pylib.egg-info/dependency_links.txt
6
- VUA_pylib.egg-info/top_level.txt
7
- VUA_pylib/common/__init__.py
8
- VUA_pylib/common/common.py
9
- VUA_pylib/corpus_reader/__init__.py
10
- VUA_pylib/corpus_reader/google_web_nl.py
11
- VUA_pylib/io_utils/__init__.py
12
- VUA_pylib/io_utils/feature_file.py
13
- VUA_pylib/lexicon/__init__.py
14
- VUA_pylib/lexicon/lexicon.py
@@ -1,23 +0,0 @@
1
- ../VUA_pylib/__init__.py
2
- ../VUA_pylib/lexicon/__init__.py
3
- ../VUA_pylib/lexicon/lexicon.py
4
- ../VUA_pylib/common/__init__.py
5
- ../VUA_pylib/common/common.py
6
- ../VUA_pylib/io_utils/__init__.py
7
- ../VUA_pylib/io_utils/feature_file.py
8
- ../VUA_pylib/corpus_reader/__init__.py
9
- ../VUA_pylib/corpus_reader/google_web_nl.py
10
- ../VUA_pylib/__init__.pyc
11
- ../VUA_pylib/lexicon/__init__.pyc
12
- ../VUA_pylib/lexicon/lexicon.pyc
13
- ../VUA_pylib/common/__init__.pyc
14
- ../VUA_pylib/common/common.pyc
15
- ../VUA_pylib/io_utils/__init__.pyc
16
- ../VUA_pylib/io_utils/feature_file.pyc
17
- ../VUA_pylib/corpus_reader/__init__.pyc
18
- ../VUA_pylib/corpus_reader/google_web_nl.pyc
19
- ./
20
- dependency_links.txt
21
- PKG-INFO
22
- SOURCES.txt
23
- top_level.txt
@@ -1 +0,0 @@
1
- from common import *
@@ -1,28 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- from operator import itemgetter
4
-
5
- # Get the max (key,count) from a dict like my_dict = {'a':20,'b':1,'c':50}
6
- # It will return --> (c,50)
7
- def get_max_distr_dict(my_dict):
8
- vect = my_dict.items()
9
- if len(vect) !=0:
10
- vect.sort(key=itemgetter(1),reverse=True)
11
- return vect[0]
12
- return None
13
-
14
- def normalize_pos(pos):
15
- pos = pos.lower()
16
- new_pos = pos
17
- if pos in ['adj','a'] or pos[0:2]=='jj':
18
- new_pos = 'a'
19
- elif pos in ['adverb','r'] or pos[0:2]=='rb':
20
- new_pos = 'r'
21
- elif pos in ['anypos']:
22
- new_pos = '*'
23
- elif pos in ['noun','n'] or pos[0:2]=='nn' or pos[0:2]=='np':
24
- new_pos = 'n'
25
- elif pos in ['verb','v'] or pos[0]=='v':
26
- new_pos = 'v'
27
- return new_pos
28
-
@@ -1 +0,0 @@
1
- from google_web_nl import *
@@ -1,156 +0,0 @@
1
- import urllib2
2
- import urllib
3
- import sys
4
- import time
5
-
6
- try:
7
- from lxml import etree
8
- except:
9
- import xml.etree.cElementTree as etree
10
-
11
- class Citem:
12
- def __init__(self,item=None):
13
- self.hits = None
14
- self.word = None
15
- self.tokens = None
16
- if item is not None:
17
- if isinstance(item,str):
18
- self.load_from_string(item)
19
- else:
20
- self.load_from_item_node(item)
21
-
22
- def load_from_string(self,line):
23
- ## Example line: 22865,"de server van"
24
- line = line.strip()
25
- pos = line.find(',')
26
- self.hits = int(line[:pos])
27
- self.word = line[pos+2:-1]
28
- self.tokens = self.word.split(' ')
29
-
30
- def load_from_item_node(self,item_node):
31
- hits_node = item_node.find('hits')
32
- if hits_node is not None:
33
- self.hits = int(hits_node.text)
34
-
35
- word_node = item_node.find('word')
36
- if word_node is not None:
37
- self.word = str(word_node.text)
38
- self.tokens = self.word.split(' ')
39
-
40
- def __str__(self):
41
- if self.word is not None and self.hits is not None:
42
- s = str(self.tokens)+' ->'+str(self.hits)+' hits'
43
- else:
44
- s = 'None'
45
- return s
46
-
47
- def __repr__(self):
48
- return self.__str__()
49
-
50
- def get_hits(self):
51
- return self.hits
52
-
53
- def get_word(self):
54
- return self.word
55
-
56
- def get_tokens(self):
57
- return self.tokens
58
-
59
-
60
- class Cgoogle_web_nl:
61
- def __init__(self):
62
- self.url='http://www.let.rug.nl/gosse/bin/Web1T5_freq.perl'
63
- self.sleep_this_time = 5 #First time to sleep in case of error
64
- self.max_trials = 20
65
- self.limit = 1000
66
- self.min_freq = 100
67
- self.items = []
68
-
69
-
70
- def set_limit(self,l):
71
- if not isinstance(l, int):
72
- print>>sys.stderr,'Parameter for set_min_freq must be an integer and not ',type(m)
73
- sys.exit(-1)
74
- self.limit = l
75
-
76
- def set_min_freq(self,m):
77
- if not isinstance(m, int):
78
- print>>sys.stderr,'Parameter for set_min_freq must be an integer and not ',type(m)
79
- sys.exit(-1)
80
- self.min_freq = m
81
-
82
- def query(self,this_query,fixed='shown'):
83
- #http://www.let.rug.nl/gosse/bin/Web1T5_freq.perl?
84
- #query=interessante%20*&
85
- #mode=XML&limit=10000&
86
- #threshold=40&optimize=on&wildcards=listed+normally
87
- #&fixed=shown&.cgifields=debug&.cgifields=optimize
88
- dict_params = {}
89
- dict_params['query'] = this_query
90
- dict_params['mode']='XML'
91
- #dict_params['mode']='csv'
92
- dict_params['limit']=self.limit
93
- dict_params['threshold']=self.min_freq
94
- dict_params['optimize']='on'
95
- dict_params['wildcards']='listed normally'
96
- dict_params['fixed']=fixed
97
- dict_params['.cgifields']='debug'
98
- dict_params['.cgifields']='optimize'
99
- params = urllib.urlencode(dict_params)
100
- #print>>sys.stderr,self.url+'?%s' % params
101
-
102
-
103
- done = False
104
- this_url = None
105
- trials = 0
106
- while not done:
107
- try:
108
- this_url = urllib2.urlopen(self.url+'?%s' % params)
109
- code = this_url.getcode()
110
- except Exception as e:
111
- code = -1
112
- print>>sys.stderr,str(e)
113
-
114
- if code == 200:
115
- done = True
116
- else:
117
- print>>sys.stderr,'Got an error (code '+str(code)+') querying google web nl, with "'+this_query+'", retrying...'
118
- print>>sys.stderr,'Trial ',trials,' waiting ',self.sleep_this_time,'seconds'
119
- time.sleep(self.sleep_this_time)
120
- trials += 1
121
- self.sleep_this_time += 1
122
- if trials == self.max_trials:
123
- print>>sys.stderr,'Maximum number of trials reached. Giving up...'
124
- done = True
125
- this_url = None
126
-
127
- if this_url is not None:
128
- if dict_params['mode'] == 'XML':
129
- xml_obj = etree.parse(this_url)
130
- this_url.close()
131
-
132
- for item_node in xml_obj.findall('item'):
133
- self.items.append(Citem(item_node))
134
- del xml_obj
135
- else: #CSV
136
- first_line = True
137
- ## The first line is frequency,"N-gram"
138
- for line in this_url:
139
- if not first_line:
140
- self.items.append(Citem(line))
141
- first_line = False
142
-
143
-
144
-
145
- def get_items(self):
146
- for item in self.items:
147
- yield item
148
-
149
- def get_all_items(self):
150
- return self.items
151
-
152
- def len(self):
153
- return len(self.items)
154
- def __iter__(self):
155
- for item in self.items:
156
- yield item
@@ -1 +0,0 @@
1
- from feature_file import *
@@ -1,121 +0,0 @@
1
- from operator import itemgetter
2
- import sys
3
- import cPickle
4
-
5
-
6
-
7
- class Cexample:
8
- def __init__(self,str_line=None):
9
- self.label = ''
10
- self.features = []
11
- if str_line is not None:
12
- self.load_from_line(str_line)
13
-
14
- def load_from_line(self,str_line):
15
- fields = str_line.strip().split('\t')
16
- self.label = fields[0]
17
- for feat in fields[1:]:
18
- first_equal = feat.find('=')
19
- if first_equal != -1:
20
- name = feat[:first_equal]
21
- value = feat[first_equal+1:]
22
- self.features.append((name,value))
23
-
24
- def __str__(self):
25
- s = 'Label: '+self.label+'\n'
26
- s += 'Feats: '+str(self.features)
27
- return s
28
-
29
- def get_label(self):
30
- return self.label
31
-
32
- def get_features(self):
33
- for name,value in self.features:
34
- yield name,value
35
-
36
- def get_all_features(self):
37
- return self.features
38
-
39
-
40
- class Cfeature_index:
41
- def __init__(self):
42
- self.idx = {}
43
-
44
- def get_number_feat(self,feat):
45
- return self.idx.get(feat,None)
46
-
47
- def add_feat(self,feat):
48
- num_feat = len(self.idx)+1
49
- self.idx[feat] = num_feat
50
- return num_feat
51
-
52
-
53
- def compose_feat(self,name,value):
54
- return name+'###'+value
55
-
56
-
57
- def __encode_features(self,feats,modify_index=True):
58
- feats_for_example = {}
59
- clean_feats = ''
60
- for name, value in feats:
61
- my_feat = self.compose_feat(name, value)
62
- clean_feats+=my_feat+' '
63
- num_feat = self.get_number_feat(my_feat)
64
- if num_feat is None:
65
- if modify_index:
66
- num_feat = self.add_feat(my_feat)
67
-
68
- if num_feat is not None:
69
- if num_feat in feats_for_example:
70
- feats_for_example[num_feat] += 1
71
- else:
72
- feats_for_example[num_feat] = 1
73
- return sorted(feats_for_example.items(),key=itemgetter(0)),clean_feats
74
-
75
-
76
- def encode_feature_file_to_svm(self,feat_file_obj,out_fic=sys.stdout):
77
- for example in feat_file_obj:
78
- class_label = example.get_label()
79
- out_fic.write(class_label)
80
- feats_for_example, clean_feats =self.__encode_features(example.get_all_features())
81
-
82
- for feat,freq_feat in feats_for_example:
83
- value = freq_feat
84
- out_fic.write(' %d:%d' % (feat,value))
85
- out_fic.write(' #'+clean_feats.encode('utf-8')+'\n')
86
-
87
- def encode_example_for_classification(self, feats,out_fic,my_class='0'):
88
- feats_for_example, clean_feats =self.__encode_features(feats,modify_index=False)
89
- out_fic.write(my_class)
90
- for feat,freq_feat in feats_for_example:
91
- value = freq_feat
92
- out_fic.write(' %d:%d' % (feat,value))
93
- out_fic.write(' #'+clean_feats.encode('utf-8')+'\n')
94
-
95
- def save_to_file(self,filename):
96
- fic = open(filename,'wb')
97
- cPickle.dump(self.idx, fic, protocol=0)
98
- fic.close()
99
-
100
- def load_from_file(self,filename):
101
- fic = open(filename,'rb')
102
- self.idx = cPickle.load(fic)
103
- fic.close()
104
-
105
-
106
-
107
- class Cfeature_file:
108
- def __init__(self,filename=None):
109
- self.filename = filename
110
-
111
- def __iter__(self):
112
- if self.filename is not None:
113
- fic = open(self.filename,'r')
114
- for line in fic:
115
- if line[0] != '#':
116
- yield Cexample(line.decode('utf-8'))
117
- fic.close()
118
-
119
-
120
-
121
-