opener-opinion-detector-base 2.0.1 → 2.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/core/python-scripts/README.md +78 -3
  3. data/core/python-scripts/classify_kaf_naf_file.py +94 -94
  4. data/core/python-scripts/models.cfg +1 -0
  5. data/core/python-scripts/scripts/config_manager.py +3 -0
  6. data/core/python-scripts/scripts/extract_features.py +0 -3
  7. data/core/python-scripts/scripts/relation_classifier.py +1 -1
  8. data/core/vendor/src/crfsuite/crfsuite.sln +42 -42
  9. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -26
  10. data/ext/hack/Rakefile +5 -2
  11. data/lib/opener/opinion_detectors/base.rb +19 -15
  12. data/lib/opener/opinion_detectors/base/version.rb +1 -1
  13. data/lib/opener/opinion_detectors/configuration_creator.rb +6 -8
  14. data/lib/opener/opinion_detectors/de.rb +1 -1
  15. data/lib/opener/opinion_detectors/es.rb +7 -0
  16. data/lib/opener/opinion_detectors/fr.rb +7 -0
  17. data/opener-opinion-detector-base.gemspec +0 -1
  18. data/pre_install_requirements.txt +3 -0
  19. metadata +41 -85
  20. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  21. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  22. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +0 -10
  23. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +0 -22
  24. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +0 -1
  25. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +0 -47
  26. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +0 -1
  27. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +0 -390
  28. data/core/site-packages/pre_build/KafNafParser/__init__.py +0 -14
  29. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +0 -125
  30. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +0 -52
  31. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +0 -78
  32. data/core/site-packages/pre_build/KafNafParser/entity_data.py +0 -59
  33. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +0 -41
  34. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +0 -2
  35. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +0 -205
  36. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +0 -309
  37. data/core/site-packages/pre_build/KafNafParser/features_data.py +0 -131
  38. data/core/site-packages/pre_build/KafNafParser/header_data.py +0 -127
  39. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +0 -211
  40. data/core/site-packages/pre_build/KafNafParser/references_data.py +0 -23
  41. data/core/site-packages/pre_build/KafNafParser/span_data.py +0 -63
  42. data/core/site-packages/pre_build/KafNafParser/term_data.py +0 -111
  43. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +0 -42
  44. data/core/site-packages/pre_build/KafNafParser/text_data.py +0 -99
  45. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +0 -10
  46. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +0 -14
  47. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +0 -1
  48. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +0 -23
  49. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +0 -1
  50. data/core/site-packages/pre_build/VUA_pylib/__init__.py +0 -1
  51. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +0 -1
  52. data/core/site-packages/pre_build/VUA_pylib/common/common.py +0 -28
  53. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +0 -1
  54. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +0 -156
  55. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +0 -1
  56. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +0 -121
  57. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +0 -1
  58. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +0 -72
  59. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +0 -10
  60. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +0 -7
  61. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +0 -1
  62. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +0 -11
  63. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +0 -1
  64. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +0 -165
  65. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +0 -439
  66. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +0 -7
  67. data/pre_build_requirements.txt +0 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9c8aef27fcd7c10ed7176b0a73bec253436c9efb
4
- data.tar.gz: 161e62461eded780c02c261e4f980f78ba0577e6
3
+ metadata.gz: 8cab19f98d9ee9c6ae4938a3be0cebb666126e44
4
+ data.tar.gz: d83ded3deb19fe5b7cead1ba079cb6bf585c9593
5
5
  SHA512:
6
- metadata.gz: 5723e548b534b9a743646de5e08adb3d59715b77bd931f381082ca3ee8924db9ed433f7655af6a9d10cef402350c9f60c4ef89afec5c2b87c90e6504a5cdb01d
7
- data.tar.gz: 3f9e5386c7fb1400d4204c232e1498b44af8958ef576cd51441b25f707f65cc331785a3b6bff86b8e36f0e854feef817b270cd0d76727eb4a95a7346d40ca0dc
6
+ metadata.gz: bb3d6a9b3d9d6fd3a3fa496b6a2de948398d250e49540dd89bd24d1efe98e9837e7bcb5dfef3b973c5fb069f4425dca2cf2c01d66bfdb148884d8af20a0c8792
7
+ data.tar.gz: ac1f7d71ec3160f279b0e2c8954ea1ee5bf29f56630d7e249a16c46c3498a091a511df97f89606d1bbaece012e011aff0bdd30bd50b84643e0993d8f9598f68a
@@ -2,14 +2,13 @@
2
2
 
3
3
  ##Introduction##
4
4
 
5
-
6
5
  Opinion miner based on machine learning that can be trained using a list of
7
6
  KAF/NAF files. It is important to notice that the opinion miner module will not call
8
7
  to any external module to obtain features. It will read all the features from the input KAF/NAF file,
9
8
  so you have to make sure that your input file contains all the required information in advance (tokens,
10
- terms, polarities, constituents, entitiess, dependencies...)
9
+ terms, polarities, constituents, entitiess, dependencies...).
11
10
 
12
- The task is divided into 2 steps
11
+ The task is general divided into 2 steps
13
12
  * Detection of opinion entities (holder, target and expression): using
14
13
  Conditional Random Fields
15
14
  * Opinion entity linking (expression<-target and expression-<holder): using
@@ -79,6 +78,82 @@ of CRFsuite and SVMLight. This file will be passed to the main script to detect
79
78
  cat my_file.kaf | classify_kaf_naf_file.py your_config_file.cfg
80
79
  ````
81
80
 
81
+ There are two basic functionalities:
82
+
83
+ * Training: from a corpus of opinion annotated files, induce and learn the models for detecting opinions
84
+ * Classification: using the previous models, find and extract opinions in new text files.
85
+
86
+ We provide models already trained and evaluated on hotel, news, attractions and restaurants domains for all the languages covered
87
+ by the OpeNER project. Most of the users will just focus on this classification step, using the models that we provide. Some others
88
+ will need to retrain the system to adapt it to a new domain or language. In the next sections we will introduce these 2 differents
89
+ usages of the opinion miner deluxe
90
+
91
+ ##Classification##
92
+
93
+ In this case you have the models already trained (either you trained them yourself or got the ones we provide) and you want just to detect
94
+ the opinions in a new file. The input format of your file needs to be valid KAF format. The script that perfoms the classification is the script
95
+ `classify_kaf_naf_file.py`. You can get information about the available parameters by running the script with the parameter -h.
96
+ ```shell
97
+ classify_kaf_naf_file.py -h
98
+ usage: classify_kaf_naf_file.py [-h]
99
+ (-m MODEL_FOLDER | -d DOMAIN | -show-models)
100
+ [-keep-opinions] [-no-time]
101
+
102
+ Detect opinion triples in a KAF/NAF file
103
+
104
+ optional arguments:
105
+ -h, --help show this help message and exit
106
+ -m MODEL_FOLDER Folder storing the trained models
107
+ -d DOMAIN The domain where the models were trained
108
+ -show-models Show the models available and finish
109
+ -keep-opinions Keep the opinions from the input (by default will be deleted)
110
+ -no-time No include time in timestamp (for testing)
111
+ ```
112
+
113
+ The script reads the input KAF file from the standard input and will write the output KAF into the standard output. The main parameter is the model that
114
+ will be used. There are two ways of specifyng this parameter:
115
+ * By using the -m FOLDER option, by means of which we can specify that we would like to use exactly the folder stored in the path FOLDER
116
+ * By using the -d DOMAIN option, where DOMAIN is the domain where the model that we want to use was trained.
117
+
118
+ We can get which are the models available by running:
119
+ ```shell
120
+ classify_kaf_naf_file.py -show-models
121
+ #########################
122
+ Models available
123
+ #########################
124
+ Model 0
125
+ Lang: en
126
+ Domain: hotel
127
+ Folder: final_models/en/hotel_cfg1
128
+ Desc: Trained with config1 in the last version of hotel annotations
129
+ Model 1
130
+ Lang: en
131
+ Domain: news
132
+ Folder: final_models/en/news_cfg1
133
+ Desc: Trained with config1 using only the sentences annotated with news
134
+ ....
135
+ ....
136
+ ```
137
+
138
+ You can train as use as many models as you want. You will need the file `models.cfg` which contains the metadata about which models
139
+ are available and how to refer to them (the domain). This is an example of the content of this file:
140
+ ```shell
141
+ #LANG|domain|pathtomodel|description
142
+ en|hotel|final_models/en/hotel_cfg1|Trained with config1 in the last version of hotel annotations
143
+ en|news|final_models/en/news_cfg1|Trained with config1 using only the sentences annotated with news
144
+ nl|hotel|final_models/nl/hotel_cfg1|Trained with config1 in the last version of hotel annotations
145
+ nl|news|final_models/nl/news_cfg1|Trained with config1 using only the sentences annotated with news
146
+ ```
147
+ So in each line a model is specified and represented using 4 fields, the language, the domain identifier (which will be used later to refer to this model),
148
+ the path to the folder and a text with a description. The language for the KAF file will be read directly from the KAF header, and considering this model
149
+ and the domain id provided to the script, the proper model will be loaded and used.
150
+
151
+ So if you want to tag a file with Dutch text called input.nl.kaf with the models trained on hotel reviews, and store the result on the file output.nl.kaf you just
152
+ should call to the program as:
153
+ ```shell
154
+ cat input.nl.kaf | python classify_kaf_naf_file.py -d hotel > output.nl.kaf
155
+ ```
156
+
82
157
  ##Training your own models##
83
158
 
84
159
  You will need first to install all the requirementes given and then follow these steps:
@@ -8,7 +8,7 @@ this_folder = os.path.dirname(os.path.realpath(__file__))
8
8
 
9
9
  # This updates the load path to ensure that the local site-packages directory
10
10
  # can be used to load packages (e.g. a locally installed copy of lxml).
11
- sys.path.append(os.path.join(this_folder, '../site-packages/pre_build'))
11
+ sys.path.append(os.path.join(this_folder, '../site-packages/pre_install'))
12
12
 
13
13
  import csv
14
14
  from tempfile import NamedTemporaryFile
@@ -20,17 +20,16 @@ import argparse
20
20
  from scripts import lexicons as lexicons_manager
21
21
  from scripts.config_manager import Cconfig_manager
22
22
  from scripts.extract_features import extract_features_from_kaf_naf_file
23
- from scripts.crfutils import extract_features_to_crf
23
+ from scripts.crfutils import extract_features_to_crf
24
24
  from scripts.link_entities_distance import link_entities_distance
25
25
  from scripts.relation_classifier import link_entities_svm
26
- from KafNafParser import *
27
- from VUA_pylib import *
26
+ from KafNafParserPy import *
28
27
 
29
28
 
30
29
  DEBUG=0
31
30
 
32
31
  my_config_manager = Cconfig_manager()
33
- __this_folder = os.getcwd()
32
+ __this_folder = os.path.dirname(os.path.realpath(__file__))
34
33
  separator = '\t'
35
34
  __desc = 'Deluxe opinion miner (CRF+SVM)'
36
35
  __last_edited = '10jan2014'
@@ -59,7 +58,7 @@ def match_crfsuite_out(crfout,list_token_ids):
59
58
  if inside:
60
59
  matches.append((current,current_type))
61
60
  current = []
62
- inside = False
61
+ inside = False
63
62
  else:
64
63
  if line=='O':
65
64
  if inside:
@@ -73,8 +72,8 @@ def match_crfsuite_out(crfout,list_token_ids):
73
72
  if inside:
74
73
  matches.append((current,current_type))
75
74
  current = [list_token_ids[num_token]]
76
- inside = True
77
- current_type = value
75
+ inside = True
76
+ current_type = value
78
77
  elif my_type == 'I':
79
78
  if inside:
80
79
  current.append(list_token_ids[num_token])
@@ -92,42 +91,42 @@ def match_crfsuite_out(crfout,list_token_ids):
92
91
  def extract_features(kaf_naf_obj):
93
92
  feat_file_desc = NamedTemporaryFile(delete=False)
94
93
  feat_file_desc.close()
95
-
94
+
96
95
  out_file = feat_file_desc.name
97
96
  err_file = out_file+'.log'
98
-
97
+
99
98
  expressions_lexicon = None
100
99
  targets_lexicon = None
101
100
  if my_config_manager.get_use_training_lexicons():
102
101
  expression_lexicon_filename = my_config_manager.get_expression_lexicon_filename()
103
102
  target_lexicon_filename = my_config_manager.get_target_lexicon_filename()
104
-
103
+
105
104
  expressions_lexicon = lexicons_manager.load_lexicon(expression_lexicon_filename)
106
105
  targets_lexicon =lexicons_manager.load_lexicon(target_lexicon_filename)
107
106
 
108
107
  #def extract_features_from_kaf_naf_file(knaf_obj,out_file=None,log_file=None,include_class=True,accepted_opinions=None, exp_lex= None):
109
108
  labels, separator,polarities_skipped = extract_features_from_kaf_naf_file(kaf_naf_obj,out_file,err_file,include_class=False, exp_lex=expressions_lexicon,tar_lex=targets_lexicon)
110
109
  return out_file, err_file
111
-
112
-
110
+
111
+
113
112
  def convert_to_crf(input_file,templates):
114
113
  out_desc = NamedTemporaryFile(delete=False)
115
114
  out_desc.close()
116
-
115
+
117
116
  out_crf = out_desc.name
118
-
117
+
119
118
  ##Load description of features
120
119
  path_feat_desc = my_config_manager.get_feature_desc_filename()
121
120
  fic = open(path_feat_desc)
122
121
  fields = fic.read().strip()
123
122
  fic.close()
124
123
  ####
125
-
124
+
126
125
  extract_features_to_crf(input_file,out_crf,fields,separator,templates,possible_classes=None)
127
126
  return out_crf
128
-
129
-
130
-
127
+
128
+
129
+
131
130
  def run_crfsuite_tag(input_file,model_file):
132
131
  crfsuite = my_config_manager.get_crfsuite_binary()
133
132
  cmd = [crfsuite]
@@ -150,8 +149,8 @@ def run_crfsuite_tag(input_file,model_file):
150
149
 
151
150
  def detect_expressions(tab_feat_file,list_token_ids):
152
151
  #1) Convert to the correct CRF
153
- templates = my_config_manager.get_templates_expr()
154
-
152
+ templates = my_config_manager.get_templates_expr()
153
+
155
154
  crf_exp_file = convert_to_crf(tab_feat_file,templates)
156
155
  logging.debug('File with crf format for EXPRESSIONS '+crf_exp_file)
157
156
  if DEBUG:
@@ -161,10 +160,10 @@ def detect_expressions(tab_feat_file,list_token_ids):
161
160
  print>>sys.stderr,f.read()
162
161
  f.close()
163
162
  print>>sys.stderr,'#'*50
164
-
163
+
165
164
  model_file = my_config_manager.get_filename_model_expression()
166
165
  output_crf,error_crf = run_crfsuite_tag(crf_exp_file,model_file)
167
-
166
+
168
167
  logging.debug('Expressions crf error: '+error_crf)
169
168
  matches_exp = match_crfsuite_out(output_crf, list_token_ids)
170
169
  if DEBUG:
@@ -175,19 +174,19 @@ def detect_expressions(tab_feat_file,list_token_ids):
175
174
  print>>sys.stderr,'MATCHES:',str(matches_exp)
176
175
  print>>sys.stderr,'TEMP FILE:',crf_exp_file
177
176
  print>>sys.stderr,'#'*50
178
-
179
-
177
+
178
+
180
179
  logging.debug('Detector expressions out: '+str(matches_exp))
181
180
  os.remove(crf_exp_file)
182
181
  return matches_exp
183
-
184
-
185
-
186
-
187
-
182
+
183
+
184
+
185
+
186
+
188
187
  def detect_targets(tab_feat_file, list_token_ids):
189
188
  templates_target = my_config_manager.get_templates_target()
190
-
189
+
191
190
  crf_target_file = convert_to_crf(tab_feat_file,templates_target)
192
191
  logging.debug('File with crf format for TARGETS '+crf_target_file)
193
192
  if DEBUG:
@@ -197,13 +196,13 @@ def detect_targets(tab_feat_file, list_token_ids):
197
196
  print>>sys.stderr,f.read()
198
197
  f.close()
199
198
  print>>sys.stderr,'#'*50
200
-
199
+
201
200
  model_target_file = my_config_manager.get_filename_model_target()
202
201
  out_crf_target,error_crf = run_crfsuite_tag(crf_target_file, model_target_file)
203
202
  logging.debug('TARGETS crf error: '+error_crf)
204
203
 
205
204
  matches_tar = match_crfsuite_out(out_crf_target, list_token_ids)
206
-
205
+
207
206
  if DEBUG:
208
207
  print>>sys.stderr,'#'*50
209
208
  print>>sys.stderr,'CRF output for TARGETS'
@@ -211,18 +210,18 @@ def detect_targets(tab_feat_file, list_token_ids):
211
210
  print>>sys.stderr,'List token ids:',str(list_token_ids)
212
211
  print>>sys.stderr,'MATCHES:',str(matches_tar)
213
212
  print>>sys.stderr,'#'*50
214
-
213
+
215
214
  logging.debug('Detector targets out: '+str(matches_tar))
216
215
  os.remove(crf_target_file)
217
216
  return matches_tar
218
-
219
-
220
-
221
-
222
-
217
+
218
+
219
+
220
+
221
+
223
222
  def detect_holders(tab_feat_file, list_token_ids):
224
223
  templates_holder = my_config_manager.get_templates_holder()
225
-
224
+
226
225
  crf_holder_file = convert_to_crf(tab_feat_file,templates_holder)
227
226
  logging.debug('File with crf format for HOLDERS '+crf_holder_file)
228
227
  if DEBUG:
@@ -232,7 +231,7 @@ def detect_holders(tab_feat_file, list_token_ids):
232
231
  print>>sys.stderr,f.read()
233
232
  f.close()
234
233
  print>>sys.stderr,'#'*50
235
-
234
+
236
235
  model_holder_file = my_config_manager.get_filename_model_holder()
237
236
  out_crf_holder,error_crf = run_crfsuite_tag(crf_holder_file, model_holder_file)
238
237
  logging.debug('HOLDERS crf error: '+error_crf)
@@ -246,12 +245,12 @@ def detect_holders(tab_feat_file, list_token_ids):
246
245
  print>>sys.stderr,'List token ids:',str(list_token_ids)
247
246
  print>>sys.stderr,'MATCHES:',str(matches_holder)
248
247
  print>>sys.stderr,'#'*50
249
-
248
+
250
249
  logging.debug('Detector HOLDERS out: '+str(matches_holder))
251
250
  os.remove(crf_holder_file)
252
251
  return matches_holder
253
-
254
-
252
+
253
+
255
254
 
256
255
 
257
256
 
@@ -267,19 +266,19 @@ def map_tokens_to_terms(list_tokens,knaf_obj):
267
266
  terms_for_token[tokid] = [termid]
268
267
  else:
269
268
  terms_for_token[tokid].append(termid)
270
-
269
+
271
270
  ret = set()
272
271
  for my_id in list_tokens:
273
272
  term_ids = terms_for_token[my_id]
274
273
  ret |= set(term_ids)
275
274
  return sorted(list(ret))
276
-
277
-
278
-
275
+
276
+
277
+
279
278
  def add_opinions_to_knaf(triples,knaf_obj,text_for_tid,ids_used, map_to_terms=True,include_polarity_strength=True):
280
279
  num_opinion = 0
281
280
  for type_exp, span_exp, span_tar, span_hol in triples:
282
- #Map tokens to terms
281
+ #Map tokens to terms
283
282
  if map_to_terms:
284
283
  span_exp_terms = map_tokens_to_terms(span_exp,kaf_obj)
285
284
  span_tar_terms = map_tokens_to_terms(span_tar,kaf_obj)
@@ -288,16 +287,16 @@ def add_opinions_to_knaf(triples,knaf_obj,text_for_tid,ids_used, map_to_terms=Tr
288
287
  span_hol_terms = span_hol
289
288
  span_tar_terms = span_tar
290
289
  span_exp_terms = span_exp
291
-
290
+
292
291
  ##Creating holder
293
292
  span_hol = Cspan()
294
293
  span_hol.create_from_ids(span_hol_terms)
295
294
  my_hol = Cholder()
296
295
  my_hol.set_span(span_hol)
297
-
296
+
298
297
  hol_text = ' '.join(text_for_tid[tid] for tid in span_hol_terms)
299
298
  my_hol.set_comment(hol_text)
300
-
299
+
301
300
  #Creating target
302
301
  span_tar = Cspan()
303
302
  span_tar.create_from_ids(span_tar_terms)
@@ -318,7 +317,7 @@ def add_opinions_to_knaf(triples,knaf_obj,text_for_tid,ids_used, map_to_terms=Tr
318
317
  exp_text = ' '.join(text_for_tid[tid] for tid in span_exp_terms)
319
318
  my_exp.set_comment(exp_text)
320
319
  #########################
321
-
320
+
322
321
  #To get the first possible ID not already used
323
322
  new_id = None
324
323
  while True:
@@ -332,32 +331,33 @@ def add_opinions_to_knaf(triples,knaf_obj,text_for_tid,ids_used, map_to_terms=Tr
332
331
  new_opinion.set_id(new_id)
333
332
  if len(span_hol_terms) != 0: #To avoid empty holders
334
333
  new_opinion.set_holder(my_hol)
335
-
334
+
336
335
  if len(span_tar_terms) != 0: #To avoid empty targets
337
336
  new_opinion.set_target(my_tar)
338
-
337
+
339
338
  new_opinion.set_expression(my_exp)
340
-
339
+
341
340
  knaf_obj.add_opinion(new_opinion)
342
-
341
+
343
342
  ##
344
343
  # Input_file_stream can be a filename of a stream
345
344
  # Opoutfile_trasm can be a filename of a stream
346
345
  #Config file must be a string filename
347
346
  def tag_file_with_opinions(input_file_stream, output_file_stream,model_folder,kaf_obj=None, remove_existing_opinions=True,include_polarity_strength=True,timestamp=True):
347
+
348
348
  config_filename = os.path.join(model_folder)
349
349
  if not os.path.exists(config_filename):
350
350
  print>>sys.stderr,'Config file not found on:',config_filename
351
351
  sys.exit(-1)
352
-
352
+
353
353
  my_config_manager.set_current_folder(__this_folder)
354
354
  my_config_manager.set_config(config_filename)
355
-
355
+
356
356
  if kaf_obj is not None:
357
357
  knaf_obj = kaf_obj
358
358
  else:
359
359
  knaf_obj = KafNafParser(input_file_stream)
360
-
360
+
361
361
  #Create a temporary file
362
362
  out_feat_file, err_feat_file = extract_features(knaf_obj)
363
363
  if DEBUG:
@@ -367,7 +367,7 @@ def tag_file_with_opinions(input_file_stream, output_file_stream,model_folder,ka
367
367
  print>>sys.stderr,f.read()
368
368
  f.close()
369
369
  print>>sys.stderr,'#'*50
370
-
370
+
371
371
  #get all the tokens in order
372
372
  list_token_ids = []
373
373
  text_for_wid = {}
@@ -378,67 +378,67 @@ def tag_file_with_opinions(input_file_stream, output_file_stream,model_folder,ka
378
378
  s_id = token_obj.get_sent()
379
379
  w_id = token_obj.get_id()
380
380
  text_for_wid[w_id] = token
381
-
381
+
382
382
  list_token_ids.append(w_id)
383
383
  sentence_for_token[w_id] = s_id
384
-
384
+
385
385
  for term in knaf_obj.get_terms():
386
386
  tid = term.get_id()
387
387
  toks = [text_for_wid.get(wid,'') for wid in term.get_span().get_span_ids()]
388
388
  text_for_tid[tid] = ' '.join(toks)
389
389
 
390
-
390
+
391
391
  expressions = detect_expressions(out_feat_file,list_token_ids)
392
392
  targets = detect_targets(out_feat_file, list_token_ids)
393
393
  holders = detect_holders(out_feat_file, list_token_ids)
394
-
394
+
395
395
  os.remove(out_feat_file)
396
396
  os.remove(err_feat_file)
397
397
 
398
398
  if DEBUG:
399
399
  print>>sys.stderr,"Expressions detected:"
400
400
  for e in expressions:
401
- print>>sys.stderr,'\t',e, ' '.join([text_for_wid[wid] for wid in e[0] ])
401
+ print>>sys.stderr,'\t',e, ' '.join([text_for_wid[wid] for wid in e[0] ])
402
402
  print>>sys.stderr
403
-
403
+
404
404
  print>>sys.stderr,'Targets detected'
405
405
  for t in targets:
406
- print>>sys.stderr,'\t',t, ' '.join([text_for_wid[wid] for wid in t[0] ])
406
+ print>>sys.stderr,'\t',t, ' '.join([text_for_wid[wid] for wid in t[0] ])
407
407
  print>>sys.stderr
408
-
408
+
409
409
  print>>sys.stderr,'Holders',holders
410
410
  for h in holders:
411
- print>>sys.stderr,'\t',h, ' '.join([text_for_wid[wid] for wid in h[0] ])
411
+ print>>sys.stderr,'\t',h, ' '.join([text_for_wid[wid] for wid in h[0] ])
412
412
  print>>sys.stderr
413
-
414
-
413
+
414
+
415
415
  # Entity linker based on distances
416
416
  ####triples = link_entities_distance(expressions,targets,holders,sentence_for_token)
417
-
417
+
418
418
  triples = link_entities_svm(expressions, targets, holders, knaf_obj, my_config_manager)
419
-
419
+
420
420
  ids_used = set()
421
421
  if remove_existing_opinions:
422
422
  knaf_obj.remove_opinion_layer()
423
423
  else:
424
424
  for opi in knaf_obj.get_opinions():
425
425
  ids_used.add(opi.get_id())
426
-
427
-
428
- add_opinions_to_knaf(triples, knaf_obj,text_for_tid,ids_used, map_to_terms=False,include_polarity_strength=include_polarity_strength)
429
-
426
+
427
+
428
+ add_opinions_to_knaf(triples, knaf_obj,text_for_tid,ids_used, map_to_terms=False,include_polarity_strength=include_polarity_strength)
429
+
430
430
  #Adding linguistic processor
431
431
  my_lp = Clp()
432
432
  my_lp.set_name(__desc)
433
433
  my_lp.set_version(__last_edited+'_'+__version)
434
434
  if timestamp:
435
- my_lp.set_timestamp() ##Set to the current date and time
435
+ my_lp.set_timestamp() ##Set to the current date and time
436
436
  else:
437
- my_lp.set_timestamp('*')
437
+ my_lp.set_timestamp('*')
438
438
  knaf_obj.add_linguistic_processor('opinions',my_lp)
439
439
  knaf_obj.dump(output_file_stream)
440
-
441
-
440
+
441
+
442
442
 
443
443
  def obtain_predefined_model(lang,domain,just_show=False):
444
444
  #This function will read the models from the file models.cfg and will return
@@ -451,7 +451,7 @@ def obtain_predefined_model(lang,domain,just_show=False):
451
451
  print '#'*25
452
452
  print 'Models available'
453
453
  print '#'*25
454
-
454
+
455
455
  nm = 0
456
456
  for line in fic:
457
457
  if line[0]!='#':
@@ -471,15 +471,15 @@ def obtain_predefined_model(lang,domain,just_show=False):
471
471
  if just_show:
472
472
  print '#'*25
473
473
  return use_this_model
474
-
474
+
475
475
  if __name__ == '__main__':
476
-
476
+
477
477
  argument_parser = argparse.ArgumentParser(description='Detect opinion triples in a KAF/NAF file')
478
478
  group = argument_parser.add_mutually_exclusive_group(required=True)
479
479
  group.add_argument('-m',dest='model_folder',help='Folder storing the trained models')
480
480
  group.add_argument('-d', dest='domain',help='The domain where the models were trained')
481
481
  group.add_argument('-show-models', dest='show_models', action='store_true',help='Show the models available and finish')
482
-
482
+
483
483
  argument_parser.add_argument('-keep-opinions',dest='keep_opinions',action='store_true',help='Keep the opinions from the input (by default will be deleted)')
484
484
  argument_parser.add_argument('-no-time',dest='timestamp',action='store_false',help='No include time in timestamp (for testing)')
485
485
  arguments = argument_parser.parse_args()
@@ -487,7 +487,7 @@ if __name__ == '__main__':
487
487
  if arguments.show_models:
488
488
  obtain_predefined_model(None,None,just_show=True)
489
489
  sys.exit(0)
490
-
490
+
491
491
  knaf_obj = KafNafParser(sys.stdin)
492
492
  model_folder = None
493
493
  if arguments.model_folder is not None:
@@ -496,12 +496,12 @@ if __name__ == '__main__':
496
496
  #Obtain the language
497
497
  lang = knaf_obj.get_language()
498
498
  model_folder = obtain_predefined_model(lang,arguments.domain)
499
-
500
-
499
+
500
+
501
501
  tag_file_with_opinions(None, sys.stdout,model_folder,kaf_obj=knaf_obj,remove_existing_opinions=(not arguments.keep_opinions),timestamp=arguments.timestamp)
502
502
  sys.exit(0)
503
-
504
-
505
-
506
-
507
-
503
+
504
+
505
+
506
+
507
+