abstractor 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. checksums.yaml +15 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +14 -0
  4. data/Rakefile +33 -0
  5. data/app/assets/images/abstractor/add.png +0 -0
  6. data/app/assets/images/abstractor/ajax-loader.gif +0 -0
  7. data/app/assets/images/abstractor/bar.gif +0 -0
  8. data/app/assets/images/abstractor/bti_logo.jpg +0 -0
  9. data/app/assets/images/abstractor/bullet_arrow_down.png +0 -0
  10. data/app/assets/images/abstractor/bullet_arrow_up.png +0 -0
  11. data/app/assets/images/abstractor/cog.png +0 -0
  12. data/app/assets/images/abstractor/delete.png +0 -0
  13. data/app/assets/images/abstractor/edit.png +0 -0
  14. data/app/assets/images/abstractor/excel.png +0 -0
  15. data/app/assets/images/abstractor/favicon.ico +0 -0
  16. data/app/assets/images/abstractor/greencheck.gif +0 -0
  17. data/app/assets/images/abstractor/loading.gif +0 -0
  18. data/app/assets/images/abstractor/nu_logo.jpg +0 -0
  19. data/app/assets/images/abstractor/nubic_logo.png +0 -0
  20. data/app/assets/images/abstractor/page.png +0 -0
  21. data/app/assets/images/abstractor/rails.png +0 -0
  22. data/app/assets/images/abstractor/robert_h_lurie.jpg +0 -0
  23. data/app/assets/images/abstractor/show.png +0 -0
  24. data/app/assets/images/abstractor/switch_minus.gif +0 -0
  25. data/app/assets/images/abstractor/switch_plus.gif +0 -0
  26. data/app/assets/javascripts/abstractor/abstractor.js +89 -0
  27. data/app/assets/javascripts/abstractor/application.js +21 -0
  28. data/app/assets/javascripts/abstractor/combobox.js +301 -0
  29. data/app/assets/javascripts/abstractor/jquery/jquery.form.js +1074 -0
  30. data/app/assets/javascripts/abstractor/nested_attributes.js +69 -0
  31. data/app/assets/stylesheets/abstractor/abstractor_abstractions.css +312 -0
  32. data/app/assets/stylesheets/abstractor/application.css +21 -0
  33. data/app/assets/stylesheets/abstractor/jquery.ui.override.css.scss +73 -0
  34. data/app/controllers/abstractor/abstractor_abstraction_groups_controller.rb +5 -0
  35. data/app/controllers/abstractor/abstractor_abstractions_controller.rb +5 -0
  36. data/app/controllers/abstractor/abstractor_suggestions_controller.rb +5 -0
  37. data/app/controllers/abstractor/application_controller.rb +2 -0
  38. data/app/helpers/abstractor/application_helper.rb +4 -0
  39. data/app/models/abstractor/abstractor_abstraction.rb +6 -0
  40. data/app/models/abstractor/abstractor_abstraction_group.rb +6 -0
  41. data/app/models/abstractor/abstractor_abstraction_group_member.rb +6 -0
  42. data/app/models/abstractor/abstractor_abstraction_schema.rb +6 -0
  43. data/app/models/abstractor/abstractor_abstraction_schema_object_value.rb +6 -0
  44. data/app/models/abstractor/abstractor_abstraction_schema_predicate_variant.rb +6 -0
  45. data/app/models/abstractor/abstractor_abstraction_schema_relation.rb +6 -0
  46. data/app/models/abstractor/abstractor_abstraction_source.rb +6 -0
  47. data/app/models/abstractor/abstractor_object_type.rb +6 -0
  48. data/app/models/abstractor/abstractor_object_value.rb +6 -0
  49. data/app/models/abstractor/abstractor_object_value_variant.rb +6 -0
  50. data/app/models/abstractor/abstractor_relation_type.rb +6 -0
  51. data/app/models/abstractor/abstractor_rule_type.rb +6 -0
  52. data/app/models/abstractor/abstractor_subject.rb +6 -0
  53. data/app/models/abstractor/abstractor_subject_group.rb +6 -0
  54. data/app/models/abstractor/abstractor_subject_group_member.rb +6 -0
  55. data/app/models/abstractor/abstractor_subject_relation.rb +6 -0
  56. data/app/models/abstractor/abstractor_suggestion.rb +6 -0
  57. data/app/models/abstractor/abstractor_suggestion_object_value.rb +6 -0
  58. data/app/models/abstractor/abstractor_suggestion_source.rb +6 -0
  59. data/app/models/abstractor/abstractor_suggestion_status.rb +6 -0
  60. data/app/views/abstractor/abstractor_abstraction_groups/_form.html.haml +10 -0
  61. data/app/views/abstractor/abstractor_abstraction_groups/edit.html.haml +1 -0
  62. data/app/views/abstractor/abstractor_abstractions/_fields.html.haml +63 -0
  63. data/app/views/abstractor/abstractor_abstractions/_list.html.haml +45 -0
  64. data/app/views/abstractor/abstractor_abstractions/edit.html.haml +53 -0
  65. data/app/views/abstractor/abstractor_abstractions/show.html.haml +1 -0
  66. data/app/views/abstractor/shared/_error_messages.html.haml +5 -0
  67. data/config/cucumber.yml +8 -0
  68. data/config/routes.rb +7 -0
  69. data/db/migrate/20131227205140_create_abstractor_object_types.rb +10 -0
  70. data/db/migrate/20131227205219_create_abstractor_object_values.rb +10 -0
  71. data/db/migrate/20131227205256_create_abstractor_object_value_variants.rb +11 -0
  72. data/db/migrate/20131227205320_create_abstractor_relation_types.rb +10 -0
  73. data/db/migrate/20131227205354_create_abstractor_rule_types.rb +11 -0
  74. data/db/migrate/20131227205432_create_abstractor_abstraction_schemas.rb +13 -0
  75. data/db/migrate/20131227205456_create_abstractor_abstraction_schema_object_values.rb +10 -0
  76. data/db/migrate/20131227205529_create_abstractor_abstraction_schema_predicate_variants.rb +11 -0
  77. data/db/migrate/20131227205610_create_abstractor_abstraction_schema_relations.rb +11 -0
  78. data/db/migrate/20131227205652_create_abstractor_subjects.rb +12 -0
  79. data/db/migrate/20131227205732_create_abstractor_subject_relations.rb +11 -0
  80. data/db/migrate/20131227205831_create_abstractor_abstraction_sources.rb +11 -0
  81. data/db/migrate/20131227210211_create_abstractor_subject_groups.rb +10 -0
  82. data/db/migrate/20131227210244_create_abstractor_subject_group_members.rb +11 -0
  83. data/db/migrate/20131227210350_create_abstractor_abstraction_groups.rb +12 -0
  84. data/db/migrate/20131227210353_create_abstractor_abstraction_group_members.rb +11 -0
  85. data/db/migrate/20131227211050_create_abstractor_suggestion_statuses.rb +10 -0
  86. data/db/migrate/20131227211303_create_abstractor_abstractions.rb +15 -0
  87. data/db/migrate/20131227213427_create_abstractor_suggestions.rb +14 -0
  88. data/db/migrate/20131228041944_create_abstractor_suggestion_object_values.rb +11 -0
  89. data/db/migrate/20131228041945_create_abstractor_suggestion_sources.rb +16 -0
  90. data/db/seeds.rb +0 -0
  91. data/lib/abstractor.rb +8 -0
  92. data/lib/abstractor/abstractable.rb +190 -0
  93. data/lib/abstractor/core_ext/string.rb +99 -0
  94. data/lib/abstractor/engine.rb +14 -0
  95. data/lib/abstractor/methods/controllers/abstractor_abstraction_groups_controller.rb +37 -0
  96. data/lib/abstractor/methods/controllers/abstractor_abstractions_controller.rb +42 -0
  97. data/lib/abstractor/methods/controllers/abstractor_suggestions_controller.rb +28 -0
  98. data/lib/abstractor/methods/models/abstractor_abstraction.rb +65 -0
  99. data/lib/abstractor/methods/models/abstractor_abstraction_group.rb +37 -0
  100. data/lib/abstractor/methods/models/abstractor_abstraction_group_member.rb +17 -0
  101. data/lib/abstractor/methods/models/abstractor_abstraction_schema.rb +27 -0
  102. data/lib/abstractor/methods/models/abstractor_abstraction_schema_object_value.rb +17 -0
  103. data/lib/abstractor/methods/models/abstractor_abstraction_schema_predicate_variant.rb +16 -0
  104. data/lib/abstractor/methods/models/abstractor_abstraction_schema_relation.rb +18 -0
  105. data/lib/abstractor/methods/models/abstractor_abstraction_source.rb +29 -0
  106. data/lib/abstractor/methods/models/abstractor_object_type.rb +16 -0
  107. data/lib/abstractor/methods/models/abstractor_object_value.rb +24 -0
  108. data/lib/abstractor/methods/models/abstractor_object_value_variant.rb +16 -0
  109. data/lib/abstractor/methods/models/abstractor_relation_type.rb +16 -0
  110. data/lib/abstractor/methods/models/abstractor_rule_type.rb +17 -0
  111. data/lib/abstractor/methods/models/abstractor_subject.rb +258 -0
  112. data/lib/abstractor/methods/models/abstractor_subject_group.rb +19 -0
  113. data/lib/abstractor/methods/models/abstractor_subject_group_member.rb +17 -0
  114. data/lib/abstractor/methods/models/abstractor_subject_relation.rb +18 -0
  115. data/lib/abstractor/methods/models/abstractor_suggestion.rb +88 -0
  116. data/lib/abstractor/methods/models/abstractor_suggestion_object_value.rb +17 -0
  117. data/lib/abstractor/methods/models/abstractor_suggestion_source.rb +17 -0
  118. data/lib/abstractor/methods/models/abstractor_suggestion_status.rb +29 -0
  119. data/lib/abstractor/methods/models/soft_delete.rb +35 -0
  120. data/lib/abstractor/negation_detection.rb +43 -0
  121. data/lib/abstractor/parser.rb +76 -0
  122. data/lib/abstractor/setup.rb +24 -0
  123. data/lib/abstractor/user_interface.rb +40 -0
  124. data/lib/abstractor/utility.rb +8 -0
  125. data/lib/abstractor/version.rb +3 -0
  126. data/lib/generators/abstractor/install/install_generator.rb +118 -0
  127. data/lib/generators/abstractor/install/templates/controllers/abstractor_abstraction_groups_controller.rb +16 -0
  128. data/lib/generators/abstractor/install/templates/controllers/abstractor_abstractions_controller.rb +24 -0
  129. data/lib/generators/abstractor/install/templates/controllers/abstractor_suggestions_controller.rb +12 -0
  130. data/lib/generators/abstractor/install/templates/dictionaries.yml +235 -0
  131. data/lib/generators/abstractor/install/templates/helpers/abstractions_helper.rb +9 -0
  132. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction.rb +9 -0
  133. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group.rb +9 -0
  134. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group_member.rb +9 -0
  135. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema.rb +9 -0
  136. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_object_value.rb +9 -0
  137. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_predicate_variant.rb +9 -0
  138. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_relation.rb +9 -0
  139. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_source.rb +9 -0
  140. data/lib/generators/abstractor/install/templates/models/abstractor_object_type.rb +9 -0
  141. data/lib/generators/abstractor/install/templates/models/abstractor_object_value.rb +9 -0
  142. data/lib/generators/abstractor/install/templates/models/abstractor_object_value_variant.rb +9 -0
  143. data/lib/generators/abstractor/install/templates/models/abstractor_relation_type.rb +9 -0
  144. data/lib/generators/abstractor/install/templates/models/abstractor_rule_type.rb +9 -0
  145. data/lib/generators/abstractor/install/templates/models/abstractor_subject.rb +9 -0
  146. data/lib/generators/abstractor/install/templates/models/abstractor_subject_group.rb +9 -0
  147. data/lib/generators/abstractor/install/templates/models/abstractor_subject_group_member.rb +9 -0
  148. data/lib/generators/abstractor/install/templates/models/abstractor_subject_relation.rb +9 -0
  149. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion.rb +9 -0
  150. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_object_value.rb +9 -0
  151. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_source.rb +9 -0
  152. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_status.rb +9 -0
  153. data/lib/lingscope/build.xml +74 -0
  154. data/lib/lingscope/build/built-jar.properties +4 -0
  155. data/lib/lingscope/build/classes/lingscope/algorithms/AbnerTokenizer.class +0 -0
  156. data/lib/lingscope/build/classes/lingscope/algorithms/AnnotationComparer.class +0 -0
  157. data/lib/lingscope/build/classes/lingscope/algorithms/Annotator.class +0 -0
  158. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineAnnotator.class +0 -0
  159. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineCueAnnotator.class +0 -0
  160. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineScopeAnnotator.class +0 -0
  161. data/lib/lingscope/build/classes/lingscope/algorithms/CrfAnnotator.class +0 -0
  162. data/lib/lingscope/build/classes/lingscope/algorithms/NegexAnnotator.class +0 -0
  163. data/lib/lingscope/build/classes/lingscope/algorithms/NegexCueAnnotator.class +0 -0
  164. data/lib/lingscope/build/classes/lingscope/algorithms/NegexScopeAnnotator.class +0 -0
  165. data/lib/lingscope/build/classes/lingscope/algorithms/PosTagger.class +0 -0
  166. data/lib/lingscope/build/classes/lingscope/algorithms/negex/GenNegEx.class +0 -0
  167. data/lib/lingscope/build/classes/lingscope/algorithms/negex/Sorter.class +0 -0
  168. data/lib/lingscope/build/classes/lingscope/drivers/AnnotatedFilesMerger.class +0 -0
  169. data/lib/lingscope/build/classes/lingscope/drivers/AnnotationComparerDriver.class +0 -0
  170. data/lib/lingscope/build/classes/lingscope/drivers/BaselineDriver.class +0 -0
  171. data/lib/lingscope/build/classes/lingscope/drivers/CrfDriver.class +0 -0
  172. data/lib/lingscope/build/classes/lingscope/drivers/CueAndPosFilesMerger.class +0 -0
  173. data/lib/lingscope/build/classes/lingscope/drivers/ModelTrainer.class +0 -0
  174. data/lib/lingscope/build/classes/lingscope/drivers/NegexDriver.class +0 -0
  175. data/lib/lingscope/build/classes/lingscope/drivers/PosTaggerDriver.class +0 -0
  176. data/lib/lingscope/build/classes/lingscope/drivers/SentencePosTagger.class +0 -0
  177. data/lib/lingscope/build/classes/lingscope/drivers/SentenceTagger.class +0 -0
  178. data/lib/lingscope/build/classes/lingscope/io/AnnotatedSentencesIO.class +0 -0
  179. data/lib/lingscope/build/classes/lingscope/structures/AnnotatedSentence.class +0 -0
  180. data/lib/lingscope/dist/README.TXT +32 -0
  181. data/lib/lingscope/dist/javadoc/allclasses-frame.html +80 -0
  182. data/lib/lingscope/dist/javadoc/allclasses-noframe.html +80 -0
  183. data/lib/lingscope/dist/javadoc/constant-values.html +199 -0
  184. data/lib/lingscope/dist/javadoc/deprecated-list.html +147 -0
  185. data/lib/lingscope/dist/javadoc/help-doc.html +224 -0
  186. data/lib/lingscope/dist/javadoc/index-files/index-1.html +188 -0
  187. data/lib/lingscope/dist/javadoc/index-files/index-10.html +149 -0
  188. data/lib/lingscope/dist/javadoc/index-files/index-11.html +158 -0
  189. data/lib/lingscope/dist/javadoc/index-files/index-12.html +157 -0
  190. data/lib/lingscope/dist/javadoc/index-files/index-13.html +177 -0
  191. data/lib/lingscope/dist/javadoc/index-files/index-14.html +155 -0
  192. data/lib/lingscope/dist/javadoc/index-files/index-15.html +152 -0
  193. data/lib/lingscope/dist/javadoc/index-files/index-16.html +146 -0
  194. data/lib/lingscope/dist/javadoc/index-files/index-2.html +158 -0
  195. data/lib/lingscope/dist/javadoc/index-files/index-3.html +165 -0
  196. data/lib/lingscope/dist/javadoc/index-files/index-4.html +146 -0
  197. data/lib/lingscope/dist/javadoc/index-files/index-5.html +219 -0
  198. data/lib/lingscope/dist/javadoc/index-files/index-6.html +149 -0
  199. data/lib/lingscope/dist/javadoc/index-files/index-7.html +155 -0
  200. data/lib/lingscope/dist/javadoc/index-files/index-8.html +185 -0
  201. data/lib/lingscope/dist/javadoc/index-files/index-9.html +164 -0
  202. data/lib/lingscope/dist/javadoc/index.html +74 -0
  203. data/lib/lingscope/dist/javadoc/lingscope/algorithms/AbnerTokenizer.html +280 -0
  204. data/lib/lingscope/dist/javadoc/lingscope/algorithms/AnnotationComparer.html +526 -0
  205. data/lib/lingscope/dist/javadoc/lingscope/algorithms/Annotator.html +401 -0
  206. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineAnnotator.html +375 -0
  207. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineCueAnnotator.html +309 -0
  208. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineScopeAnnotator.html +340 -0
  209. data/lib/lingscope/dist/javadoc/lingscope/algorithms/CrfAnnotator.html +340 -0
  210. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexAnnotator.html +364 -0
  211. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexCueAnnotator.html +309 -0
  212. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexScopeAnnotator.html +309 -0
  213. data/lib/lingscope/dist/javadoc/lingscope/algorithms/PosTagger.html +268 -0
  214. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AbnerTokenizer.html +145 -0
  215. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AnnotationComparer.html +145 -0
  216. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/Annotator.html +299 -0
  217. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineAnnotator.html +189 -0
  218. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineCueAnnotator.html +145 -0
  219. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineScopeAnnotator.html +145 -0
  220. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/CrfAnnotator.html +145 -0
  221. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexAnnotator.html +189 -0
  222. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexCueAnnotator.html +145 -0
  223. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexScopeAnnotator.html +145 -0
  224. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/PosTagger.html +145 -0
  225. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/GenNegEx.html +369 -0
  226. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/Sorter.html +253 -0
  227. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/GenNegEx.html +181 -0
  228. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/Sorter.html +145 -0
  229. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-frame.html +35 -0
  230. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-summary.html +165 -0
  231. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-tree.html +154 -0
  232. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-use.html +174 -0
  233. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-frame.html +53 -0
  234. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-summary.html +198 -0
  235. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-tree.html +160 -0
  236. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-use.html +202 -0
  237. data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotatedFilesMerger.html +284 -0
  238. data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotationComparerDriver.html +258 -0
  239. data/lib/lingscope/dist/javadoc/lingscope/drivers/BaselineDriver.html +260 -0
  240. data/lib/lingscope/dist/javadoc/lingscope/drivers/CrfDriver.html +262 -0
  241. data/lib/lingscope/dist/javadoc/lingscope/drivers/CueAndPosFilesMerger.html +310 -0
  242. data/lib/lingscope/dist/javadoc/lingscope/drivers/ModelTrainer.html +281 -0
  243. data/lib/lingscope/dist/javadoc/lingscope/drivers/NegexDriver.html +262 -0
  244. data/lib/lingscope/dist/javadoc/lingscope/drivers/PosTaggerDriver.html +289 -0
  245. data/lib/lingscope/dist/javadoc/lingscope/drivers/SentencePosTagger.html +313 -0
  246. data/lib/lingscope/dist/javadoc/lingscope/drivers/SentenceTagger.html +512 -0
  247. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotatedFilesMerger.html +145 -0
  248. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotationComparerDriver.html +145 -0
  249. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/BaselineDriver.html +145 -0
  250. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CrfDriver.html +145 -0
  251. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CueAndPosFilesMerger.html +145 -0
  252. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/ModelTrainer.html +145 -0
  253. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/NegexDriver.html +145 -0
  254. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/PosTaggerDriver.html +145 -0
  255. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentencePosTagger.html +145 -0
  256. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentenceTagger.html +145 -0
  257. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-frame.html +51 -0
  258. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-summary.html +195 -0
  259. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-tree.html +154 -0
  260. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-use.html +145 -0
  261. data/lib/lingscope/dist/javadoc/lingscope/io/AnnotatedSentencesIO.html +285 -0
  262. data/lib/lingscope/dist/javadoc/lingscope/io/class-use/AnnotatedSentencesIO.html +145 -0
  263. data/lib/lingscope/dist/javadoc/lingscope/io/package-frame.html +33 -0
  264. data/lib/lingscope/dist/javadoc/lingscope/io/package-summary.html +158 -0
  265. data/lib/lingscope/dist/javadoc/lingscope/io/package-tree.html +154 -0
  266. data/lib/lingscope/dist/javadoc/lingscope/io/package-use.html +145 -0
  267. data/lib/lingscope/dist/javadoc/lingscope/structures/AnnotatedSentence.html +425 -0
  268. data/lib/lingscope/dist/javadoc/lingscope/structures/class-use/AnnotatedSentence.html +423 -0
  269. data/lib/lingscope/dist/javadoc/lingscope/structures/package-frame.html +33 -0
  270. data/lib/lingscope/dist/javadoc/lingscope/structures/package-summary.html +158 -0
  271. data/lib/lingscope/dist/javadoc/lingscope/structures/package-tree.html +154 -0
  272. data/lib/lingscope/dist/javadoc/lingscope/structures/package-use.html +209 -0
  273. data/lib/lingscope/dist/javadoc/overview-frame.html +51 -0
  274. data/lib/lingscope/dist/javadoc/overview-summary.html +168 -0
  275. data/lib/lingscope/dist/javadoc/overview-tree.html +159 -0
  276. data/lib/lingscope/dist/javadoc/package-list +5 -0
  277. data/lib/lingscope/dist/javadoc/resources/inherit.gif +0 -0
  278. data/lib/lingscope/dist/javadoc/stylesheet.css +29 -0
  279. data/lib/lingscope/dist/lib/abner.jar +0 -0
  280. data/lib/lingscope/dist/lib/commons-lang-2.4.jar +0 -0
  281. data/lib/lingscope/dist/lib/java_utils.jar +0 -0
  282. data/lib/lingscope/dist/lib/sptoolkit.jar +0 -0
  283. data/lib/lingscope/dist/lib/stanford-postagger.jar +0 -0
  284. data/lib/lingscope/dist/lingscope.jar +0 -0
  285. data/lib/lingscope/lingscope.zip +0 -0
  286. data/lib/lingscope/manifest.mf +3 -0
  287. data/lib/lingscope/nbproject/build-impl.xml +1338 -0
  288. data/lib/lingscope/nbproject/genfiles.properties +8 -0
  289. data/lib/lingscope/nbproject/private/config.properties +0 -0
  290. data/lib/lingscope/nbproject/private/private.properties +10 -0
  291. data/lib/lingscope/nbproject/private/private.xml +5 -0
  292. data/lib/lingscope/nbproject/project.properties +85 -0
  293. data/lib/lingscope/nbproject/project.xml +15 -0
  294. data/lib/lingscope/negation_models.zip +0 -0
  295. data/lib/lingscope/negation_models/baseline_cue_all_both.model +49 -0
  296. data/lib/lingscope/negation_models/baseline_cue_all_clinical.model +19 -0
  297. data/lib/lingscope/negation_models/crf_cue_all_both.model +0 -0
  298. data/lib/lingscope/negation_models/crf_cue_all_clinical.model +0 -0
  299. data/lib/lingscope/negation_models/crf_scope_cue_crf_all_both.model +0 -0
  300. data/lib/lingscope/negation_models/crf_scope_cue_crf_all_clinical.model +0 -0
  301. data/lib/lingscope/negation_models/crf_scope_cue_regex_all_both.model +0 -0
  302. data/lib/lingscope/negation_models/crf_scope_cue_regex_all_clinical.model +0 -0
  303. data/lib/lingscope/negation_models/crf_scope_words_all_both.model +0 -0
  304. data/lib/lingscope/negation_models/crf_scope_words_all_clinical.model +0 -0
  305. data/lib/lingscope/negation_models/crf_scope_words_crf_all_both.model +0 -0
  306. data/lib/lingscope/negation_models/crf_scope_words_crf_all_clinical.model +0 -0
  307. data/lib/lingscope/negation_models/crf_scope_words_regex_all_both.model +0 -0
  308. data/lib/lingscope/negation_models/crf_scope_words_regex_all_clinical.model +0 -0
  309. data/lib/lingscope/src/lingscope/algorithms/AbnerTokenizer.java +58 -0
  310. data/lib/lingscope/src/lingscope/algorithms/AnnotationComparer.java +207 -0
  311. data/lib/lingscope/src/lingscope/algorithms/Annotator.java +51 -0
  312. data/lib/lingscope/src/lingscope/algorithms/BaselineAnnotator.java +80 -0
  313. data/lib/lingscope/src/lingscope/algorithms/BaselineCueAnnotator.java +84 -0
  314. data/lib/lingscope/src/lingscope/algorithms/BaselineScopeAnnotator.java +101 -0
  315. data/lib/lingscope/src/lingscope/algorithms/CrfAnnotator.java +45 -0
  316. data/lib/lingscope/src/lingscope/algorithms/NegexAnnotator.java +52 -0
  317. data/lib/lingscope/src/lingscope/algorithms/NegexCueAnnotator.java +26 -0
  318. data/lib/lingscope/src/lingscope/algorithms/NegexScopeAnnotator.java +26 -0
  319. data/lib/lingscope/src/lingscope/algorithms/PosTagger.java +54 -0
  320. data/lib/lingscope/src/lingscope/algorithms/negex/GenNegEx.java +530 -0
  321. data/lib/lingscope/src/lingscope/algorithms/negex/Sorter.java +36 -0
  322. data/lib/lingscope/src/lingscope/drivers/AnnotatedFilesMerger.java +61 -0
  323. data/lib/lingscope/src/lingscope/drivers/AnnotationComparerDriver.java +22 -0
  324. data/lib/lingscope/src/lingscope/drivers/BaselineDriver.java +45 -0
  325. data/lib/lingscope/src/lingscope/drivers/CrfDriver.java +31 -0
  326. data/lib/lingscope/src/lingscope/drivers/CueAndPosFilesMerger.java +86 -0
  327. data/lib/lingscope/src/lingscope/drivers/ModelTrainer.java +39 -0
  328. data/lib/lingscope/src/lingscope/drivers/NegexDriver.java +32 -0
  329. data/lib/lingscope/src/lingscope/drivers/PosTaggerDriver.java +62 -0
  330. data/lib/lingscope/src/lingscope/drivers/SentencePosTagger.java +89 -0
  331. data/lib/lingscope/src/lingscope/drivers/SentenceTagger.java +158 -0
  332. data/lib/lingscope/src/lingscope/io/AnnotatedSentencesIO.java +53 -0
  333. data/lib/lingscope/src/lingscope/structures/AnnotatedSentence.java +105 -0
  334. data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.csv +1 -0
  335. data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.txt +2273 -0
  336. data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.csv +1 -0
  337. data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.txt +1342 -0
  338. data/lib/setup/data/ICD-O-3_CSV-metadata/icd-o3 readme.txt +21 -0
  339. data/lib/setup/data/custom_site_synonyms.csv +1 -0
  340. data/lib/setup/data/diagnosis_categorizations.csv +1 -0
  341. data/lib/setup/data/high_level_primary_cns_diagnosis_categorizations.yml +597 -0
  342. data/lib/setup/data/icdo3_sites_with_laterality.txt +11 -0
  343. data/lib/setup/data/site_site_categories.txt +28 -0
  344. data/lib/tasks/abstractor_tasks.rake +22 -0
  345. data/lib/tasks/cucumber.rake +65 -0
  346. metadata +754 -0
@@ -0,0 +1,36 @@
1
+ package lingscope.algorithms.negex;
2
+
3
+ import java.util.*;
4
+
5
+ // Utility class to sort the negation rules by length in descending order.
6
+ // Rules need to be matched by longest first because there is overlap between the
7
+ // RegEx of the rules.
8
+ //
9
+ // Author: Imre Solti
10
+ // solti@u.washington.edu
11
+ // Date: 10/20/2008
12
+ public class Sorter {
13
+
14
+ public List<String> sortRules(List<String> unsortedRules) {
15
+
16
+ try {
17
+ // Sort the negation rules by length to make sure
18
+ // that longest rules match first.
19
+ String temp = "";
20
+ for (int i = 0; i < unsortedRules.size() - 1; i++) {
21
+ for (int j = i + 1; j < unsortedRules.size(); j++) {
22
+ String a = (String) unsortedRules.get(i);
23
+ String b = (String) unsortedRules.get(j);
24
+ if (a.trim().length() < b.trim().length()) {
25
+ // Sorting into descending order by lebgth of string.
26
+ unsortedRules.set(i, b);
27
+ unsortedRules.set(j, a);
28
+ }
29
+ }
30
+ }
31
+ } catch (Exception e) {
32
+ System.out.println(e);
33
+ }
34
+ return unsortedRules;
35
+ }
36
+ }
@@ -0,0 +1,61 @@
1
+ package lingscope.drivers;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.List;
5
+ import lingscope.io.AnnotatedSentencesIO;
6
+ import lingscope.structures.AnnotatedSentence;
7
+
8
+ /**
9
+ * Merges two annotated files. Useful to merge a words scope file with a POS cue
10
+ * file
11
+ * @author shashank
12
+ */
13
+ public class AnnotatedFilesMerger {
14
+
15
+ /**
16
+ * Merges the given wordsSentence and the given tagsSentence
17
+ * @param wordsSentence
18
+ * @param tagsSentence
19
+ * @return
20
+ */
21
+ public static AnnotatedSentence merge(AnnotatedSentence wordsSentence, AnnotatedSentence tagsSentence) {
22
+ List<String> words = wordsSentence.getWords();
23
+ List<String> tags = tagsSentence.getTags();
24
+ int numTokens = words.size();
25
+ if (tags.size() != numTokens) {
26
+ System.err.println("Skipping non-equal length sentences");
27
+ System.err.println("\tSentence 1: " + wordsSentence.getRawText());
28
+ System.err.println("\tSentence 2: " + tagsSentence.getRawText());
29
+ return null;
30
+ }
31
+ StringBuilder mergedSentence = new StringBuilder();
32
+ for (int j = 0; j < numTokens; ++j) {
33
+ mergedSentence.append(" ").append(words.get(j)).append("|").append(tags.get(j));
34
+ }
35
+ return new AnnotatedSentence(mergedSentence.substring(1));
36
+ }
37
+
38
+ /**
39
+ *
40
+ * @param args
41
+ * 0 - file 1: the file from which words will be taken
42
+ * 1 - file 2: the file from which tags will be taken
43
+ * 2 - output file path
44
+ */
45
+ public static void main(String[] args) {
46
+ List<AnnotatedSentence> wordsSentences = AnnotatedSentencesIO.read(args[0]);
47
+ List<AnnotatedSentence> tagsSentences = AnnotatedSentencesIO.read(args[1]);
48
+ int numSentences = tagsSentences.size();
49
+ List<AnnotatedSentence> mergedSentences = new ArrayList<AnnotatedSentence>(numSentences);
50
+ for (int i = 0; i < numSentences; ++i) {
51
+ AnnotatedSentence wordsSentence = wordsSentences.get(i);
52
+ AnnotatedSentence tagsSentence = tagsSentences.get(i);
53
+ AnnotatedSentence mergedSentence = merge(wordsSentence, tagsSentence);
54
+ if (mergedSentence == null) {
55
+ continue;
56
+ }
57
+ mergedSentences.add(mergedSentence);
58
+ }
59
+ AnnotatedSentencesIO.write(args[2], mergedSentences);
60
+ }
61
+ }
@@ -0,0 +1,22 @@
1
+ package lingscope.drivers;
2
+
3
+ import lingscope.algorithms.AnnotationComparer;
4
+
5
+ /**
6
+ * Compares annotations between a gold and test file
7
+ * @author shashank
8
+ */
9
+ public class AnnotationComparerDriver {
10
+
11
+ /**
12
+ *
13
+ * @param args
14
+ * 0 - gold file
15
+ * 1 - test file
16
+ */
17
+ public static void main(String[] args) {
18
+ AnnotationComparer comparer = new AnnotationComparer(10);
19
+ comparer.compareAnnotationFiles(args[0], args[1]);
20
+ comparer.printStats();
21
+ }
22
+ }
@@ -0,0 +1,45 @@
1
+ package lingscope.drivers;
2
+
3
+ import java.util.List;
4
+ import lingscope.algorithms.Annotator;
5
+ import lingscope.algorithms.BaselineScopeAnnotator;
6
+ import lingscope.io.AnnotatedSentencesIO;
7
+ import lingscope.structures.AnnotatedSentence;
8
+
9
+ /**
10
+ *
11
+ * @author shashank
12
+ */
13
+ public class BaselineDriver {
14
+
15
+ /**
16
+ *
17
+ * @param args
18
+ * 0 - Annotator type ("cue" or "scope")
19
+ * 1 - Serialized annotator file
20
+ * 2 - Input file
21
+ * 3 - Output file
22
+ * 4 - if annotator type is "scope", then should scope be limited by commas
23
+ * 5 - if annotator type is "scope", then should scope be limited by periods
24
+ * 6 - (boolean) does the input file contain annotated sentence (true) or
25
+ * not (false)
26
+ */
27
+ public static void main(String[] args) {
28
+ Annotator annotator;
29
+
30
+ if (args[0].equalsIgnoreCase("scope")) {
31
+ annotator = new BaselineScopeAnnotator(SentenceTagger.SCOPE_START,
32
+ SentenceTagger.SCOPE_INTER, SentenceTagger.OTHER,
33
+ Boolean.parseBoolean(args[4]), Boolean.parseBoolean(args[5]));
34
+ } else {
35
+ annotator = SentenceTagger.getAnnotator("baseline", args[0]);
36
+ }
37
+ annotator.loadAnnotator(args[1]);
38
+
39
+ boolean isAnnotated = Boolean.parseBoolean(args[6]);
40
+ List<String> inputSentences = SentenceTagger.getStringList(args[2], isAnnotated);
41
+ List<AnnotatedSentence> outputSentences = SentenceTagger.annotateSentences(annotator, inputSentences, isAnnotated);
42
+ AnnotatedSentencesIO.write(args[3], outputSentences);
43
+
44
+ }
45
+ }
@@ -0,0 +1,31 @@
1
+ package lingscope.drivers;
2
+
3
+ import java.util.List;
4
+ import lingscope.algorithms.Annotator;
5
+ import lingscope.io.AnnotatedSentencesIO;
6
+ import lingscope.structures.AnnotatedSentence;
7
+
8
+ /**
9
+ * Driver to mark scope or cue in a file
10
+ * @author shashank
11
+ */
12
+ public class CrfDriver {
13
+ /**
14
+ *
15
+ * @param args
16
+ * 0 - Annotator type ("cue" or "scope")
17
+ * 1 - Serialized annotator file
18
+ * 2 - Input file
19
+ * 3 - Output file
20
+ * 4 - (boolean) does the input file contain annotated sentence (true) or
21
+ * not (false)
22
+ */
23
+ public static void main(String[] args) {
24
+ Annotator annotator = SentenceTagger.getAnnotator("crf", args[0]);
25
+ annotator.loadAnnotator(args[1]);
26
+ boolean isAnnotated = Boolean.parseBoolean(args[4]);
27
+ List<String> inputSentences = SentenceTagger.getStringList(args[2], isAnnotated);
28
+ List<AnnotatedSentence> outputSentences = SentenceTagger.annotateSentences(annotator, inputSentences, isAnnotated);
29
+ AnnotatedSentencesIO.write(args[3], outputSentences);
30
+ }
31
+ }
@@ -0,0 +1,86 @@
1
+ package lingscope.drivers;
2
+
3
+ import generalutils.FileOperations;
4
+ import java.util.ArrayList;
5
+ import java.util.List;
6
+ import java.util.logging.Level;
7
+ import java.util.logging.Logger;
8
+ import lingscope.io.AnnotatedSentencesIO;
9
+ import lingscope.structures.AnnotatedSentence;
10
+
11
+ /**
12
+ * Merges two files, one containing pos tags and the other containing
13
+ * annotated cues
14
+ * @author shashank
15
+ */
16
+ public class CueAndPosFilesMerger {
17
+
18
+ public static AnnotatedSentence merge(AnnotatedSentence cueSentence, String posSentence, boolean replaceTags) {
19
+ String[] posTags = posSentence.split("\\s+");
20
+ List<String> crfTags = cueSentence.getTags();
21
+ List<String> words = cueSentence.getWords();
22
+ List<Boolean> tagStatusList = cueSentence.getIsAnnotatedTags();
23
+ StringBuilder mergedSentence = new StringBuilder();
24
+ int numWords = posTags.length;
25
+ for (int j = 0; j < numWords; ++j) {
26
+ mergedSentence.append(" ");
27
+ String posTag = posTags[j];
28
+ String word = words.get(j);
29
+ String crfTag = crfTags.get(j);
30
+ boolean tagStatus = tagStatusList.get(j);
31
+ if (tagStatus) {
32
+ if (replaceTags) {
33
+ mergedSentence.append("CUE|");
34
+ } else {
35
+ mergedSentence.append(word).append("|");
36
+ }
37
+ } else {
38
+ mergedSentence.append(posTag).append("|");
39
+ }
40
+ mergedSentence.append(crfTag);
41
+ }
42
+
43
+ AnnotatedSentence mergedAnnotatedSentence = new AnnotatedSentence(mergedSentence.substring(1));
44
+ return mergedAnnotatedSentence;
45
+ }
46
+
47
+ /**
48
+ * Merges the cueSentences and posSentences
49
+ * @param cueSentences
50
+ * @param posSentences
51
+ * @param replaceTags
52
+ * @return
53
+ */
54
+ public static List<AnnotatedSentence> merge(List<AnnotatedSentence> cueSentences, List<String> posSentences, boolean replaceTags) {
55
+ List<AnnotatedSentence> mergedSentences = new ArrayList<AnnotatedSentence>(cueSentences.size());
56
+ int numSentences = posSentences.size();
57
+ for (int i = 0; i < numSentences; ++i) {
58
+ AnnotatedSentence cueSentence = cueSentences.get(i);
59
+ String posSentence = posSentences.get(i);
60
+ AnnotatedSentence mergedAnnotatedSentence = merge(cueSentence, posSentence, replaceTags);
61
+ mergedSentences.add(mergedAnnotatedSentence);
62
+ }
63
+ return mergedSentences;
64
+ }
65
+
66
+ /**
67
+ *
68
+ * @param args
69
+ * 0 - cue input file
70
+ * 1 - pos input file
71
+ * 2 - replace cue with custom tag 'CUE' (true) or leave it as it is (false)
72
+ * 3 - merged file output path
73
+ */
74
+ public static void main(String[] args) {
75
+ boolean replaceTags = Boolean.parseBoolean(args[2]);
76
+ List<AnnotatedSentence> cueSentences = AnnotatedSentencesIO.read(args[0]);
77
+ List<String> posSentences = null;
78
+ try {
79
+ posSentences = FileOperations.readFile(args[1]);
80
+ } catch (Exception ex) {
81
+ Logger.getLogger(CueAndPosFilesMerger.class.getName()).log(Level.SEVERE, null, ex);
82
+ System.exit(1);
83
+ }
84
+ AnnotatedSentencesIO.write(args[3], merge(cueSentences, posSentences, replaceTags));
85
+ }
86
+ }
@@ -0,0 +1,39 @@
1
+ package lingscope.drivers;
2
+
3
+ import lingscope.algorithms.Annotator;
4
+
5
+ /**
6
+ * Driver to train a model file. The training data will have to be provided.
7
+ * @author shashank
8
+ */
9
+ public class ModelTrainer {
10
+
11
+ /**
12
+ * Prints the usage for the model trainer
13
+ */
14
+ public static void usage() {
15
+ System.out.println("Usage:\njava lingscope.drivers.ModelTrainer (cue|scope) (crf|baseline|negex) training_data_file file_where_model_will_be_saved");
16
+ }
17
+
18
+ /**
19
+ *
20
+ * @param args
21
+ * 0 - Annotator type ("cue" or "scope")
22
+ * 1 - Model type ("crf", "baseline" or "negex")
23
+ * 2 - File from which training data will be read
24
+ * 2 - File where the model will be saved
25
+ */
26
+ public static void main(String[] args) {
27
+ if (args.length != 4) {
28
+ usage();
29
+ System.exit(0);
30
+ }
31
+ Annotator annotator = SentenceTagger.getAnnotator(args[1], args[0]);
32
+ if (annotator == null) {
33
+ usage();
34
+ System.exit(1);
35
+ }
36
+ annotator.serializeAnnotator(args[2], args[3]);
37
+ }
38
+
39
+ }
@@ -0,0 +1,32 @@
1
+ package lingscope.drivers;
2
+
3
+ import java.util.List;
4
+ import lingscope.algorithms.Annotator;
5
+ import lingscope.io.AnnotatedSentencesIO;
6
+ import lingscope.structures.AnnotatedSentence;
7
+
8
+ /**
9
+ * Runs the annotators
10
+ * @author shashank
11
+ */
12
+ public class NegexDriver {
13
+
14
+ /**
15
+ *
16
+ * @param args
17
+ * 0 - Annotator type ("cue" or "scope")
18
+ * 1 - Serialized annotator file
19
+ * 2 - Input file
20
+ * 3 - Output file
21
+ * 4 - (boolean) does the input file contain annotated sentence (true) or
22
+ * not (false)
23
+ */
24
+ public static void main(String[] args) {
25
+ Annotator annotator = SentenceTagger.getAnnotator("negex", args[0]);
26
+ annotator.loadAnnotator(args[1]);
27
+ boolean isAnnotated = Boolean.parseBoolean(args[4]);
28
+ List<String> inputSentences = SentenceTagger.getStringList(args[2], isAnnotated);
29
+ List<AnnotatedSentence> outputSentences = SentenceTagger.annotateSentences(annotator, inputSentences, isAnnotated);
30
+ AnnotatedSentencesIO.write(args[3], outputSentences);
31
+ }
32
+ }
@@ -0,0 +1,62 @@
1
+ package lingscope.drivers;
2
+
3
+ import generalutils.FileOperations;
4
+ import java.util.ArrayList;
5
+ import java.util.List;
6
+ import java.util.logging.Level;
7
+ import java.util.logging.Logger;
8
+ import lingscope.algorithms.PosTagger;
9
+
10
+ /**
11
+ * Driver for the Part of Speech tagger. Use this to tag all sentences in a
12
+ * given file with part of speech tags
13
+ * @author shashank
14
+ */
15
+ public class PosTaggerDriver {
16
+
17
+ private static PosTagger posTagger = null;
18
+
19
+ /**
20
+ * Gets the equivalent POS sentence for the given sentenceToTag
21
+ * @param grammerFile file containing POS model
22
+ * @param sentenceToTag sentence for which POS tags will be returned
23
+ * @param is
24
+ * @return a string where the words from sentenceToTag are replaced with
25
+ * corresponding part of speech tags
26
+ */
27
+ public static String getTaggedSentence(String grammerFile, String sentenceToTag, boolean isTokenized) {
28
+ if (posTagger == null) {
29
+ posTagger = new PosTagger(grammerFile);
30
+ }
31
+ List<String> posTags = posTagger.replaceWordsWithPos(sentenceToTag, isTokenized);
32
+ StringBuilder posSentence = new StringBuilder();
33
+ for (String posTag : posTags) {
34
+ posSentence.append(" ").append(posTag);
35
+ }
36
+ return posSentence.substring(1);
37
+ }
38
+
39
+ /**
40
+ *
41
+ * @param args
42
+ * 0 - file containing the part of speech model
43
+ * 1 - input file
44
+ * 2 - output file
45
+ * 3 - (boolean) does the input file contain annotated sentence (true) or
46
+ * not (false)
47
+ */
48
+ public static void main(String[] args) {
49
+ String grammarFile = args[0];
50
+ List<String> inputSentences = SentenceTagger.getStringList(args[1], Boolean.parseBoolean(args[3]));
51
+ List<String> outputSentences = new ArrayList<String>(inputSentences.size());
52
+ for (String inputSentence : inputSentences) {
53
+ String outputSentence = getTaggedSentence(grammarFile, inputSentence, Boolean.parseBoolean(args[3]));
54
+ outputSentences.add(outputSentence);
55
+ }
56
+ try {
57
+ FileOperations.writeFile(args[2], outputSentences);
58
+ } catch (Exception ex) {
59
+ Logger.getLogger(PosTaggerDriver.class.getName()).log(Level.SEVERE, null, ex);
60
+ }
61
+ }
62
+ }
@@ -0,0 +1,89 @@
1
+ package lingscope.drivers;
2
+
3
+ import generalutils.FileOperations;
4
+ import java.util.List;
5
+ import lingscope.algorithms.Annotator;
6
+ import lingscope.structures.AnnotatedSentence;
7
+
8
+ /**
9
+ * Use this sentence tagger when using a model that tags POS
10
+ * @author shashank
11
+ */
12
+ public class SentencePosTagger {
13
+
14
+ public static void usage() {
15
+ System.out.println("java lingscope.drivers.SentencePosTagger cue_tagging_model "
16
+ + "cue_tagger_type(baseline|crf|negex) "
17
+ + "replace_cue_with_custom_tag(true|false) scope_tagging_model "
18
+ + "pos_model_file sentence_to_tag");
19
+ System.out.println("\tSaved model for negation can be obtained from http://negscope.askhermes.org/");
20
+ System.out.println("\tSaved model for speculation can be obtained from http://hedgescope.askhermes.org/");
21
+ System.out.println("\tSaved model for NegEx can be obtained from http://code.google.com/p/negex/downloads/list");
22
+ System.out.println("\tSaved pos_model_file can be obtained from http://hedgescope.askhermes.org/");
23
+ }
24
+
25
+ /**
26
+ *
27
+ * @param args
28
+ * 0 - cue tagging model
29
+ * 1 - cue tagger type (baseline, crf or negex)
30
+ * 2 - replace cue words with custom tag CUE (true) or not (false)
31
+ * 3 - crf pos-based scope tagging model
32
+ * 4 - POS model file
33
+ * 5 - sentence to tag
34
+ */
35
+ public static void main(String[] args) {
36
+ if (args[0].equalsIgnoreCase("help")) {
37
+ usage();
38
+ System.exit(0);
39
+ } else if (args.length < 6) {
40
+ usage();
41
+ System.exit(1);
42
+ }
43
+ Annotator cueAnnotator = SentenceTagger.getAnnotator(args[1], "cue");
44
+ cueAnnotator.loadAnnotator(args[0]);
45
+ Annotator scopeAnnotator = SentenceTagger.getAnnotator("crf", "scope");
46
+ scopeAnnotator.loadAnnotator(args[3]);
47
+ String sentence = args[5];
48
+ String grammarFile = args[4];
49
+
50
+ if ("file".equalsIgnoreCase(sentence)) {
51
+ String sentencesFile = args[6];
52
+ try {
53
+ List<String> sentences = FileOperations.readFile(sentencesFile);
54
+ for (String sentenceText : sentences) {
55
+ tagSentence(sentenceText, grammarFile,
56
+ Boolean.parseBoolean(args[2]), cueAnnotator,
57
+ scopeAnnotator);
58
+ }
59
+ } catch (Exception ex) {
60
+ ex.printStackTrace(System.err);
61
+ }
62
+ } else {
63
+ tagSentence(sentence, grammarFile, Boolean.parseBoolean(args[2]),
64
+ cueAnnotator, scopeAnnotator);
65
+ }
66
+ }
67
+
68
+ /**
69
+ * Tags the given sentence
70
+ * @param sentence the text of the sentence to tag
71
+ * @param grammarFile path to the Stanford part of speech model file
72
+ * @param replaceCueWords if true, cue words will be replaced with custom
73
+ * tag 'CUE'
74
+ * @param cueAnnotator the {@link Annotator} object to identify negation or
75
+ * hedge cue in the sentence
76
+ * @param scopeAnnotator the {@link Annotator} object to identify negation
77
+ * or hedge scope in the sentence
78
+ */
79
+ public static void tagSentence(String sentence, String grammarFile,
80
+ boolean replaceCueWords, Annotator cueAnnotator, Annotator scopeAnnotator) {
81
+ String posSentence = PosTaggerDriver.getTaggedSentence(grammarFile, sentence, false);
82
+ AnnotatedSentence cueTaggedSentence = cueAnnotator.annotateSentence(sentence, false);
83
+ AnnotatedSentence posCueMerged = CueAndPosFilesMerger.merge(cueTaggedSentence, posSentence, replaceCueWords);
84
+ AnnotatedSentence scopeMarkedSentence = scopeAnnotator.annotateSentence(posCueMerged.getSentenceText(), true);
85
+ AnnotatedSentence scopeWordsMarkedSentence = AnnotatedFilesMerger.merge(cueTaggedSentence, scopeMarkedSentence);
86
+ System.out.println(scopeWordsMarkedSentence.getRawText());
87
+
88
+ }
89
+ }