abstractor 1.0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (346) hide show
  1. checksums.yaml +15 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +14 -0
  4. data/Rakefile +33 -0
  5. data/app/assets/images/abstractor/add.png +0 -0
  6. data/app/assets/images/abstractor/ajax-loader.gif +0 -0
  7. data/app/assets/images/abstractor/bar.gif +0 -0
  8. data/app/assets/images/abstractor/bti_logo.jpg +0 -0
  9. data/app/assets/images/abstractor/bullet_arrow_down.png +0 -0
  10. data/app/assets/images/abstractor/bullet_arrow_up.png +0 -0
  11. data/app/assets/images/abstractor/cog.png +0 -0
  12. data/app/assets/images/abstractor/delete.png +0 -0
  13. data/app/assets/images/abstractor/edit.png +0 -0
  14. data/app/assets/images/abstractor/excel.png +0 -0
  15. data/app/assets/images/abstractor/favicon.ico +0 -0
  16. data/app/assets/images/abstractor/greencheck.gif +0 -0
  17. data/app/assets/images/abstractor/loading.gif +0 -0
  18. data/app/assets/images/abstractor/nu_logo.jpg +0 -0
  19. data/app/assets/images/abstractor/nubic_logo.png +0 -0
  20. data/app/assets/images/abstractor/page.png +0 -0
  21. data/app/assets/images/abstractor/rails.png +0 -0
  22. data/app/assets/images/abstractor/robert_h_lurie.jpg +0 -0
  23. data/app/assets/images/abstractor/show.png +0 -0
  24. data/app/assets/images/abstractor/switch_minus.gif +0 -0
  25. data/app/assets/images/abstractor/switch_plus.gif +0 -0
  26. data/app/assets/javascripts/abstractor/abstractor.js +89 -0
  27. data/app/assets/javascripts/abstractor/application.js +21 -0
  28. data/app/assets/javascripts/abstractor/combobox.js +301 -0
  29. data/app/assets/javascripts/abstractor/jquery/jquery.form.js +1074 -0
  30. data/app/assets/javascripts/abstractor/nested_attributes.js +69 -0
  31. data/app/assets/stylesheets/abstractor/abstractor_abstractions.css +312 -0
  32. data/app/assets/stylesheets/abstractor/application.css +21 -0
  33. data/app/assets/stylesheets/abstractor/jquery.ui.override.css.scss +73 -0
  34. data/app/controllers/abstractor/abstractor_abstraction_groups_controller.rb +5 -0
  35. data/app/controllers/abstractor/abstractor_abstractions_controller.rb +5 -0
  36. data/app/controllers/abstractor/abstractor_suggestions_controller.rb +5 -0
  37. data/app/controllers/abstractor/application_controller.rb +2 -0
  38. data/app/helpers/abstractor/application_helper.rb +4 -0
  39. data/app/models/abstractor/abstractor_abstraction.rb +6 -0
  40. data/app/models/abstractor/abstractor_abstraction_group.rb +6 -0
  41. data/app/models/abstractor/abstractor_abstraction_group_member.rb +6 -0
  42. data/app/models/abstractor/abstractor_abstraction_schema.rb +6 -0
  43. data/app/models/abstractor/abstractor_abstraction_schema_object_value.rb +6 -0
  44. data/app/models/abstractor/abstractor_abstraction_schema_predicate_variant.rb +6 -0
  45. data/app/models/abstractor/abstractor_abstraction_schema_relation.rb +6 -0
  46. data/app/models/abstractor/abstractor_abstraction_source.rb +6 -0
  47. data/app/models/abstractor/abstractor_object_type.rb +6 -0
  48. data/app/models/abstractor/abstractor_object_value.rb +6 -0
  49. data/app/models/abstractor/abstractor_object_value_variant.rb +6 -0
  50. data/app/models/abstractor/abstractor_relation_type.rb +6 -0
  51. data/app/models/abstractor/abstractor_rule_type.rb +6 -0
  52. data/app/models/abstractor/abstractor_subject.rb +6 -0
  53. data/app/models/abstractor/abstractor_subject_group.rb +6 -0
  54. data/app/models/abstractor/abstractor_subject_group_member.rb +6 -0
  55. data/app/models/abstractor/abstractor_subject_relation.rb +6 -0
  56. data/app/models/abstractor/abstractor_suggestion.rb +6 -0
  57. data/app/models/abstractor/abstractor_suggestion_object_value.rb +6 -0
  58. data/app/models/abstractor/abstractor_suggestion_source.rb +6 -0
  59. data/app/models/abstractor/abstractor_suggestion_status.rb +6 -0
  60. data/app/views/abstractor/abstractor_abstraction_groups/_form.html.haml +10 -0
  61. data/app/views/abstractor/abstractor_abstraction_groups/edit.html.haml +1 -0
  62. data/app/views/abstractor/abstractor_abstractions/_fields.html.haml +63 -0
  63. data/app/views/abstractor/abstractor_abstractions/_list.html.haml +45 -0
  64. data/app/views/abstractor/abstractor_abstractions/edit.html.haml +53 -0
  65. data/app/views/abstractor/abstractor_abstractions/show.html.haml +1 -0
  66. data/app/views/abstractor/shared/_error_messages.html.haml +5 -0
  67. data/config/cucumber.yml +8 -0
  68. data/config/routes.rb +7 -0
  69. data/db/migrate/20131227205140_create_abstractor_object_types.rb +10 -0
  70. data/db/migrate/20131227205219_create_abstractor_object_values.rb +10 -0
  71. data/db/migrate/20131227205256_create_abstractor_object_value_variants.rb +11 -0
  72. data/db/migrate/20131227205320_create_abstractor_relation_types.rb +10 -0
  73. data/db/migrate/20131227205354_create_abstractor_rule_types.rb +11 -0
  74. data/db/migrate/20131227205432_create_abstractor_abstraction_schemas.rb +13 -0
  75. data/db/migrate/20131227205456_create_abstractor_abstraction_schema_object_values.rb +10 -0
  76. data/db/migrate/20131227205529_create_abstractor_abstraction_schema_predicate_variants.rb +11 -0
  77. data/db/migrate/20131227205610_create_abstractor_abstraction_schema_relations.rb +11 -0
  78. data/db/migrate/20131227205652_create_abstractor_subjects.rb +12 -0
  79. data/db/migrate/20131227205732_create_abstractor_subject_relations.rb +11 -0
  80. data/db/migrate/20131227205831_create_abstractor_abstraction_sources.rb +11 -0
  81. data/db/migrate/20131227210211_create_abstractor_subject_groups.rb +10 -0
  82. data/db/migrate/20131227210244_create_abstractor_subject_group_members.rb +11 -0
  83. data/db/migrate/20131227210350_create_abstractor_abstraction_groups.rb +12 -0
  84. data/db/migrate/20131227210353_create_abstractor_abstraction_group_members.rb +11 -0
  85. data/db/migrate/20131227211050_create_abstractor_suggestion_statuses.rb +10 -0
  86. data/db/migrate/20131227211303_create_abstractor_abstractions.rb +15 -0
  87. data/db/migrate/20131227213427_create_abstractor_suggestions.rb +14 -0
  88. data/db/migrate/20131228041944_create_abstractor_suggestion_object_values.rb +11 -0
  89. data/db/migrate/20131228041945_create_abstractor_suggestion_sources.rb +16 -0
  90. data/db/seeds.rb +0 -0
  91. data/lib/abstractor.rb +8 -0
  92. data/lib/abstractor/abstractable.rb +190 -0
  93. data/lib/abstractor/core_ext/string.rb +99 -0
  94. data/lib/abstractor/engine.rb +14 -0
  95. data/lib/abstractor/methods/controllers/abstractor_abstraction_groups_controller.rb +37 -0
  96. data/lib/abstractor/methods/controllers/abstractor_abstractions_controller.rb +42 -0
  97. data/lib/abstractor/methods/controllers/abstractor_suggestions_controller.rb +28 -0
  98. data/lib/abstractor/methods/models/abstractor_abstraction.rb +65 -0
  99. data/lib/abstractor/methods/models/abstractor_abstraction_group.rb +37 -0
  100. data/lib/abstractor/methods/models/abstractor_abstraction_group_member.rb +17 -0
  101. data/lib/abstractor/methods/models/abstractor_abstraction_schema.rb +27 -0
  102. data/lib/abstractor/methods/models/abstractor_abstraction_schema_object_value.rb +17 -0
  103. data/lib/abstractor/methods/models/abstractor_abstraction_schema_predicate_variant.rb +16 -0
  104. data/lib/abstractor/methods/models/abstractor_abstraction_schema_relation.rb +18 -0
  105. data/lib/abstractor/methods/models/abstractor_abstraction_source.rb +29 -0
  106. data/lib/abstractor/methods/models/abstractor_object_type.rb +16 -0
  107. data/lib/abstractor/methods/models/abstractor_object_value.rb +24 -0
  108. data/lib/abstractor/methods/models/abstractor_object_value_variant.rb +16 -0
  109. data/lib/abstractor/methods/models/abstractor_relation_type.rb +16 -0
  110. data/lib/abstractor/methods/models/abstractor_rule_type.rb +17 -0
  111. data/lib/abstractor/methods/models/abstractor_subject.rb +258 -0
  112. data/lib/abstractor/methods/models/abstractor_subject_group.rb +19 -0
  113. data/lib/abstractor/methods/models/abstractor_subject_group_member.rb +17 -0
  114. data/lib/abstractor/methods/models/abstractor_subject_relation.rb +18 -0
  115. data/lib/abstractor/methods/models/abstractor_suggestion.rb +88 -0
  116. data/lib/abstractor/methods/models/abstractor_suggestion_object_value.rb +17 -0
  117. data/lib/abstractor/methods/models/abstractor_suggestion_source.rb +17 -0
  118. data/lib/abstractor/methods/models/abstractor_suggestion_status.rb +29 -0
  119. data/lib/abstractor/methods/models/soft_delete.rb +35 -0
  120. data/lib/abstractor/negation_detection.rb +43 -0
  121. data/lib/abstractor/parser.rb +76 -0
  122. data/lib/abstractor/setup.rb +24 -0
  123. data/lib/abstractor/user_interface.rb +40 -0
  124. data/lib/abstractor/utility.rb +8 -0
  125. data/lib/abstractor/version.rb +3 -0
  126. data/lib/generators/abstractor/install/install_generator.rb +118 -0
  127. data/lib/generators/abstractor/install/templates/controllers/abstractor_abstraction_groups_controller.rb +16 -0
  128. data/lib/generators/abstractor/install/templates/controllers/abstractor_abstractions_controller.rb +24 -0
  129. data/lib/generators/abstractor/install/templates/controllers/abstractor_suggestions_controller.rb +12 -0
  130. data/lib/generators/abstractor/install/templates/dictionaries.yml +235 -0
  131. data/lib/generators/abstractor/install/templates/helpers/abstractions_helper.rb +9 -0
  132. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction.rb +9 -0
  133. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group.rb +9 -0
  134. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group_member.rb +9 -0
  135. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema.rb +9 -0
  136. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_object_value.rb +9 -0
  137. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_predicate_variant.rb +9 -0
  138. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_relation.rb +9 -0
  139. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_source.rb +9 -0
  140. data/lib/generators/abstractor/install/templates/models/abstractor_object_type.rb +9 -0
  141. data/lib/generators/abstractor/install/templates/models/abstractor_object_value.rb +9 -0
  142. data/lib/generators/abstractor/install/templates/models/abstractor_object_value_variant.rb +9 -0
  143. data/lib/generators/abstractor/install/templates/models/abstractor_relation_type.rb +9 -0
  144. data/lib/generators/abstractor/install/templates/models/abstractor_rule_type.rb +9 -0
  145. data/lib/generators/abstractor/install/templates/models/abstractor_subject.rb +9 -0
  146. data/lib/generators/abstractor/install/templates/models/abstractor_subject_group.rb +9 -0
  147. data/lib/generators/abstractor/install/templates/models/abstractor_subject_group_member.rb +9 -0
  148. data/lib/generators/abstractor/install/templates/models/abstractor_subject_relation.rb +9 -0
  149. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion.rb +9 -0
  150. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_object_value.rb +9 -0
  151. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_source.rb +9 -0
  152. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_status.rb +9 -0
  153. data/lib/lingscope/build.xml +74 -0
  154. data/lib/lingscope/build/built-jar.properties +4 -0
  155. data/lib/lingscope/build/classes/lingscope/algorithms/AbnerTokenizer.class +0 -0
  156. data/lib/lingscope/build/classes/lingscope/algorithms/AnnotationComparer.class +0 -0
  157. data/lib/lingscope/build/classes/lingscope/algorithms/Annotator.class +0 -0
  158. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineAnnotator.class +0 -0
  159. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineCueAnnotator.class +0 -0
  160. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineScopeAnnotator.class +0 -0
  161. data/lib/lingscope/build/classes/lingscope/algorithms/CrfAnnotator.class +0 -0
  162. data/lib/lingscope/build/classes/lingscope/algorithms/NegexAnnotator.class +0 -0
  163. data/lib/lingscope/build/classes/lingscope/algorithms/NegexCueAnnotator.class +0 -0
  164. data/lib/lingscope/build/classes/lingscope/algorithms/NegexScopeAnnotator.class +0 -0
  165. data/lib/lingscope/build/classes/lingscope/algorithms/PosTagger.class +0 -0
  166. data/lib/lingscope/build/classes/lingscope/algorithms/negex/GenNegEx.class +0 -0
  167. data/lib/lingscope/build/classes/lingscope/algorithms/negex/Sorter.class +0 -0
  168. data/lib/lingscope/build/classes/lingscope/drivers/AnnotatedFilesMerger.class +0 -0
  169. data/lib/lingscope/build/classes/lingscope/drivers/AnnotationComparerDriver.class +0 -0
  170. data/lib/lingscope/build/classes/lingscope/drivers/BaselineDriver.class +0 -0
  171. data/lib/lingscope/build/classes/lingscope/drivers/CrfDriver.class +0 -0
  172. data/lib/lingscope/build/classes/lingscope/drivers/CueAndPosFilesMerger.class +0 -0
  173. data/lib/lingscope/build/classes/lingscope/drivers/ModelTrainer.class +0 -0
  174. data/lib/lingscope/build/classes/lingscope/drivers/NegexDriver.class +0 -0
  175. data/lib/lingscope/build/classes/lingscope/drivers/PosTaggerDriver.class +0 -0
  176. data/lib/lingscope/build/classes/lingscope/drivers/SentencePosTagger.class +0 -0
  177. data/lib/lingscope/build/classes/lingscope/drivers/SentenceTagger.class +0 -0
  178. data/lib/lingscope/build/classes/lingscope/io/AnnotatedSentencesIO.class +0 -0
  179. data/lib/lingscope/build/classes/lingscope/structures/AnnotatedSentence.class +0 -0
  180. data/lib/lingscope/dist/README.TXT +32 -0
  181. data/lib/lingscope/dist/javadoc/allclasses-frame.html +80 -0
  182. data/lib/lingscope/dist/javadoc/allclasses-noframe.html +80 -0
  183. data/lib/lingscope/dist/javadoc/constant-values.html +199 -0
  184. data/lib/lingscope/dist/javadoc/deprecated-list.html +147 -0
  185. data/lib/lingscope/dist/javadoc/help-doc.html +224 -0
  186. data/lib/lingscope/dist/javadoc/index-files/index-1.html +188 -0
  187. data/lib/lingscope/dist/javadoc/index-files/index-10.html +149 -0
  188. data/lib/lingscope/dist/javadoc/index-files/index-11.html +158 -0
  189. data/lib/lingscope/dist/javadoc/index-files/index-12.html +157 -0
  190. data/lib/lingscope/dist/javadoc/index-files/index-13.html +177 -0
  191. data/lib/lingscope/dist/javadoc/index-files/index-14.html +155 -0
  192. data/lib/lingscope/dist/javadoc/index-files/index-15.html +152 -0
  193. data/lib/lingscope/dist/javadoc/index-files/index-16.html +146 -0
  194. data/lib/lingscope/dist/javadoc/index-files/index-2.html +158 -0
  195. data/lib/lingscope/dist/javadoc/index-files/index-3.html +165 -0
  196. data/lib/lingscope/dist/javadoc/index-files/index-4.html +146 -0
  197. data/lib/lingscope/dist/javadoc/index-files/index-5.html +219 -0
  198. data/lib/lingscope/dist/javadoc/index-files/index-6.html +149 -0
  199. data/lib/lingscope/dist/javadoc/index-files/index-7.html +155 -0
  200. data/lib/lingscope/dist/javadoc/index-files/index-8.html +185 -0
  201. data/lib/lingscope/dist/javadoc/index-files/index-9.html +164 -0
  202. data/lib/lingscope/dist/javadoc/index.html +74 -0
  203. data/lib/lingscope/dist/javadoc/lingscope/algorithms/AbnerTokenizer.html +280 -0
  204. data/lib/lingscope/dist/javadoc/lingscope/algorithms/AnnotationComparer.html +526 -0
  205. data/lib/lingscope/dist/javadoc/lingscope/algorithms/Annotator.html +401 -0
  206. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineAnnotator.html +375 -0
  207. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineCueAnnotator.html +309 -0
  208. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineScopeAnnotator.html +340 -0
  209. data/lib/lingscope/dist/javadoc/lingscope/algorithms/CrfAnnotator.html +340 -0
  210. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexAnnotator.html +364 -0
  211. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexCueAnnotator.html +309 -0
  212. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexScopeAnnotator.html +309 -0
  213. data/lib/lingscope/dist/javadoc/lingscope/algorithms/PosTagger.html +268 -0
  214. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AbnerTokenizer.html +145 -0
  215. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AnnotationComparer.html +145 -0
  216. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/Annotator.html +299 -0
  217. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineAnnotator.html +189 -0
  218. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineCueAnnotator.html +145 -0
  219. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineScopeAnnotator.html +145 -0
  220. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/CrfAnnotator.html +145 -0
  221. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexAnnotator.html +189 -0
  222. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexCueAnnotator.html +145 -0
  223. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexScopeAnnotator.html +145 -0
  224. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/PosTagger.html +145 -0
  225. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/GenNegEx.html +369 -0
  226. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/Sorter.html +253 -0
  227. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/GenNegEx.html +181 -0
  228. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/Sorter.html +145 -0
  229. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-frame.html +35 -0
  230. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-summary.html +165 -0
  231. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-tree.html +154 -0
  232. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-use.html +174 -0
  233. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-frame.html +53 -0
  234. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-summary.html +198 -0
  235. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-tree.html +160 -0
  236. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-use.html +202 -0
  237. data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotatedFilesMerger.html +284 -0
  238. data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotationComparerDriver.html +258 -0
  239. data/lib/lingscope/dist/javadoc/lingscope/drivers/BaselineDriver.html +260 -0
  240. data/lib/lingscope/dist/javadoc/lingscope/drivers/CrfDriver.html +262 -0
  241. data/lib/lingscope/dist/javadoc/lingscope/drivers/CueAndPosFilesMerger.html +310 -0
  242. data/lib/lingscope/dist/javadoc/lingscope/drivers/ModelTrainer.html +281 -0
  243. data/lib/lingscope/dist/javadoc/lingscope/drivers/NegexDriver.html +262 -0
  244. data/lib/lingscope/dist/javadoc/lingscope/drivers/PosTaggerDriver.html +289 -0
  245. data/lib/lingscope/dist/javadoc/lingscope/drivers/SentencePosTagger.html +313 -0
  246. data/lib/lingscope/dist/javadoc/lingscope/drivers/SentenceTagger.html +512 -0
  247. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotatedFilesMerger.html +145 -0
  248. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotationComparerDriver.html +145 -0
  249. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/BaselineDriver.html +145 -0
  250. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CrfDriver.html +145 -0
  251. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CueAndPosFilesMerger.html +145 -0
  252. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/ModelTrainer.html +145 -0
  253. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/NegexDriver.html +145 -0
  254. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/PosTaggerDriver.html +145 -0
  255. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentencePosTagger.html +145 -0
  256. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentenceTagger.html +145 -0
  257. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-frame.html +51 -0
  258. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-summary.html +195 -0
  259. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-tree.html +154 -0
  260. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-use.html +145 -0
  261. data/lib/lingscope/dist/javadoc/lingscope/io/AnnotatedSentencesIO.html +285 -0
  262. data/lib/lingscope/dist/javadoc/lingscope/io/class-use/AnnotatedSentencesIO.html +145 -0
  263. data/lib/lingscope/dist/javadoc/lingscope/io/package-frame.html +33 -0
  264. data/lib/lingscope/dist/javadoc/lingscope/io/package-summary.html +158 -0
  265. data/lib/lingscope/dist/javadoc/lingscope/io/package-tree.html +154 -0
  266. data/lib/lingscope/dist/javadoc/lingscope/io/package-use.html +145 -0
  267. data/lib/lingscope/dist/javadoc/lingscope/structures/AnnotatedSentence.html +425 -0
  268. data/lib/lingscope/dist/javadoc/lingscope/structures/class-use/AnnotatedSentence.html +423 -0
  269. data/lib/lingscope/dist/javadoc/lingscope/structures/package-frame.html +33 -0
  270. data/lib/lingscope/dist/javadoc/lingscope/structures/package-summary.html +158 -0
  271. data/lib/lingscope/dist/javadoc/lingscope/structures/package-tree.html +154 -0
  272. data/lib/lingscope/dist/javadoc/lingscope/structures/package-use.html +209 -0
  273. data/lib/lingscope/dist/javadoc/overview-frame.html +51 -0
  274. data/lib/lingscope/dist/javadoc/overview-summary.html +168 -0
  275. data/lib/lingscope/dist/javadoc/overview-tree.html +159 -0
  276. data/lib/lingscope/dist/javadoc/package-list +5 -0
  277. data/lib/lingscope/dist/javadoc/resources/inherit.gif +0 -0
  278. data/lib/lingscope/dist/javadoc/stylesheet.css +29 -0
  279. data/lib/lingscope/dist/lib/abner.jar +0 -0
  280. data/lib/lingscope/dist/lib/commons-lang-2.4.jar +0 -0
  281. data/lib/lingscope/dist/lib/java_utils.jar +0 -0
  282. data/lib/lingscope/dist/lib/sptoolkit.jar +0 -0
  283. data/lib/lingscope/dist/lib/stanford-postagger.jar +0 -0
  284. data/lib/lingscope/dist/lingscope.jar +0 -0
  285. data/lib/lingscope/lingscope.zip +0 -0
  286. data/lib/lingscope/manifest.mf +3 -0
  287. data/lib/lingscope/nbproject/build-impl.xml +1338 -0
  288. data/lib/lingscope/nbproject/genfiles.properties +8 -0
  289. data/lib/lingscope/nbproject/private/config.properties +0 -0
  290. data/lib/lingscope/nbproject/private/private.properties +10 -0
  291. data/lib/lingscope/nbproject/private/private.xml +5 -0
  292. data/lib/lingscope/nbproject/project.properties +85 -0
  293. data/lib/lingscope/nbproject/project.xml +15 -0
  294. data/lib/lingscope/negation_models.zip +0 -0
  295. data/lib/lingscope/negation_models/baseline_cue_all_both.model +49 -0
  296. data/lib/lingscope/negation_models/baseline_cue_all_clinical.model +19 -0
  297. data/lib/lingscope/negation_models/crf_cue_all_both.model +0 -0
  298. data/lib/lingscope/negation_models/crf_cue_all_clinical.model +0 -0
  299. data/lib/lingscope/negation_models/crf_scope_cue_crf_all_both.model +0 -0
  300. data/lib/lingscope/negation_models/crf_scope_cue_crf_all_clinical.model +0 -0
  301. data/lib/lingscope/negation_models/crf_scope_cue_regex_all_both.model +0 -0
  302. data/lib/lingscope/negation_models/crf_scope_cue_regex_all_clinical.model +0 -0
  303. data/lib/lingscope/negation_models/crf_scope_words_all_both.model +0 -0
  304. data/lib/lingscope/negation_models/crf_scope_words_all_clinical.model +0 -0
  305. data/lib/lingscope/negation_models/crf_scope_words_crf_all_both.model +0 -0
  306. data/lib/lingscope/negation_models/crf_scope_words_crf_all_clinical.model +0 -0
  307. data/lib/lingscope/negation_models/crf_scope_words_regex_all_both.model +0 -0
  308. data/lib/lingscope/negation_models/crf_scope_words_regex_all_clinical.model +0 -0
  309. data/lib/lingscope/src/lingscope/algorithms/AbnerTokenizer.java +58 -0
  310. data/lib/lingscope/src/lingscope/algorithms/AnnotationComparer.java +207 -0
  311. data/lib/lingscope/src/lingscope/algorithms/Annotator.java +51 -0
  312. data/lib/lingscope/src/lingscope/algorithms/BaselineAnnotator.java +80 -0
  313. data/lib/lingscope/src/lingscope/algorithms/BaselineCueAnnotator.java +84 -0
  314. data/lib/lingscope/src/lingscope/algorithms/BaselineScopeAnnotator.java +101 -0
  315. data/lib/lingscope/src/lingscope/algorithms/CrfAnnotator.java +45 -0
  316. data/lib/lingscope/src/lingscope/algorithms/NegexAnnotator.java +52 -0
  317. data/lib/lingscope/src/lingscope/algorithms/NegexCueAnnotator.java +26 -0
  318. data/lib/lingscope/src/lingscope/algorithms/NegexScopeAnnotator.java +26 -0
  319. data/lib/lingscope/src/lingscope/algorithms/PosTagger.java +54 -0
  320. data/lib/lingscope/src/lingscope/algorithms/negex/GenNegEx.java +530 -0
  321. data/lib/lingscope/src/lingscope/algorithms/negex/Sorter.java +36 -0
  322. data/lib/lingscope/src/lingscope/drivers/AnnotatedFilesMerger.java +61 -0
  323. data/lib/lingscope/src/lingscope/drivers/AnnotationComparerDriver.java +22 -0
  324. data/lib/lingscope/src/lingscope/drivers/BaselineDriver.java +45 -0
  325. data/lib/lingscope/src/lingscope/drivers/CrfDriver.java +31 -0
  326. data/lib/lingscope/src/lingscope/drivers/CueAndPosFilesMerger.java +86 -0
  327. data/lib/lingscope/src/lingscope/drivers/ModelTrainer.java +39 -0
  328. data/lib/lingscope/src/lingscope/drivers/NegexDriver.java +32 -0
  329. data/lib/lingscope/src/lingscope/drivers/PosTaggerDriver.java +62 -0
  330. data/lib/lingscope/src/lingscope/drivers/SentencePosTagger.java +89 -0
  331. data/lib/lingscope/src/lingscope/drivers/SentenceTagger.java +158 -0
  332. data/lib/lingscope/src/lingscope/io/AnnotatedSentencesIO.java +53 -0
  333. data/lib/lingscope/src/lingscope/structures/AnnotatedSentence.java +105 -0
  334. data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.csv +1 -0
  335. data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.txt +2273 -0
  336. data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.csv +1 -0
  337. data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.txt +1342 -0
  338. data/lib/setup/data/ICD-O-3_CSV-metadata/icd-o3 readme.txt +21 -0
  339. data/lib/setup/data/custom_site_synonyms.csv +1 -0
  340. data/lib/setup/data/diagnosis_categorizations.csv +1 -0
  341. data/lib/setup/data/high_level_primary_cns_diagnosis_categorizations.yml +597 -0
  342. data/lib/setup/data/icdo3_sites_with_laterality.txt +11 -0
  343. data/lib/setup/data/site_site_categories.txt +28 -0
  344. data/lib/tasks/abstractor_tasks.rake +22 -0
  345. data/lib/tasks/cucumber.rake +65 -0
  346. metadata +754 -0
@@ -0,0 +1,36 @@
1
+ package lingscope.algorithms.negex;
2
+
3
+ import java.util.*;
4
+
5
+ // Utility class to sort the negation rules by length in descending order.
6
+ // Rules need to be matched by longest first because there is overlap between the
7
+ // RegEx of the rules.
8
+ //
9
+ // Author: Imre Solti
10
+ // solti@u.washington.edu
11
+ // Date: 10/20/2008
12
+ public class Sorter {
13
+
14
+ public List<String> sortRules(List<String> unsortedRules) {
15
+
16
+ try {
17
+ // Sort the negation rules by length to make sure
18
+ // that longest rules match first.
19
+ String temp = "";
20
+ for (int i = 0; i < unsortedRules.size() - 1; i++) {
21
+ for (int j = i + 1; j < unsortedRules.size(); j++) {
22
+ String a = (String) unsortedRules.get(i);
23
+ String b = (String) unsortedRules.get(j);
24
+ if (a.trim().length() < b.trim().length()) {
25
+ // Sorting into descending order by lebgth of string.
26
+ unsortedRules.set(i, b);
27
+ unsortedRules.set(j, a);
28
+ }
29
+ }
30
+ }
31
+ } catch (Exception e) {
32
+ System.out.println(e);
33
+ }
34
+ return unsortedRules;
35
+ }
36
+ }
@@ -0,0 +1,61 @@
1
+ package lingscope.drivers;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.List;
5
+ import lingscope.io.AnnotatedSentencesIO;
6
+ import lingscope.structures.AnnotatedSentence;
7
+
8
+ /**
9
+ * Merges two annotated files. Useful to merge a words scope file with a POS cue
10
+ * file
11
+ * @author shashank
12
+ */
13
+ public class AnnotatedFilesMerger {
14
+
15
+ /**
16
+ * Merges the given wordsSentence and the given tagsSentence
17
+ * @param wordsSentence
18
+ * @param tagsSentence
19
+ * @return
20
+ */
21
+ public static AnnotatedSentence merge(AnnotatedSentence wordsSentence, AnnotatedSentence tagsSentence) {
22
+ List<String> words = wordsSentence.getWords();
23
+ List<String> tags = tagsSentence.getTags();
24
+ int numTokens = words.size();
25
+ if (tags.size() != numTokens) {
26
+ System.err.println("Skipping non-equal length sentences");
27
+ System.err.println("\tSentence 1: " + wordsSentence.getRawText());
28
+ System.err.println("\tSentence 2: " + tagsSentence.getRawText());
29
+ return null;
30
+ }
31
+ StringBuilder mergedSentence = new StringBuilder();
32
+ for (int j = 0; j < numTokens; ++j) {
33
+ mergedSentence.append(" ").append(words.get(j)).append("|").append(tags.get(j));
34
+ }
35
+ return new AnnotatedSentence(mergedSentence.substring(1));
36
+ }
37
+
38
+ /**
39
+ *
40
+ * @param args
41
+ * 0 - file 1: the file from which words will be taken
42
+ * 1 - file 2: the file from which tags will be taken
43
+ * 2 - output file path
44
+ */
45
+ public static void main(String[] args) {
46
+ List<AnnotatedSentence> wordsSentences = AnnotatedSentencesIO.read(args[0]);
47
+ List<AnnotatedSentence> tagsSentences = AnnotatedSentencesIO.read(args[1]);
48
+ int numSentences = tagsSentences.size();
49
+ List<AnnotatedSentence> mergedSentences = new ArrayList<AnnotatedSentence>(numSentences);
50
+ for (int i = 0; i < numSentences; ++i) {
51
+ AnnotatedSentence wordsSentence = wordsSentences.get(i);
52
+ AnnotatedSentence tagsSentence = tagsSentences.get(i);
53
+ AnnotatedSentence mergedSentence = merge(wordsSentence, tagsSentence);
54
+ if (mergedSentence == null) {
55
+ continue;
56
+ }
57
+ mergedSentences.add(mergedSentence);
58
+ }
59
+ AnnotatedSentencesIO.write(args[2], mergedSentences);
60
+ }
61
+ }
@@ -0,0 +1,22 @@
1
+ package lingscope.drivers;
2
+
3
+ import lingscope.algorithms.AnnotationComparer;
4
+
5
+ /**
6
+ * Compares annotations between a gold and test file
7
+ * @author shashank
8
+ */
9
+ public class AnnotationComparerDriver {
10
+
11
+ /**
12
+ *
13
+ * @param args
14
+ * 0 - gold file
15
+ * 1 - test file
16
+ */
17
+ public static void main(String[] args) {
18
+ AnnotationComparer comparer = new AnnotationComparer(10);
19
+ comparer.compareAnnotationFiles(args[0], args[1]);
20
+ comparer.printStats();
21
+ }
22
+ }
@@ -0,0 +1,45 @@
1
+ package lingscope.drivers;
2
+
3
+ import java.util.List;
4
+ import lingscope.algorithms.Annotator;
5
+ import lingscope.algorithms.BaselineScopeAnnotator;
6
+ import lingscope.io.AnnotatedSentencesIO;
7
+ import lingscope.structures.AnnotatedSentence;
8
+
9
+ /**
10
+ *
11
+ * @author shashank
12
+ */
13
+ public class BaselineDriver {
14
+
15
+ /**
16
+ *
17
+ * @param args
18
+ * 0 - Annotator type ("cue" or "scope")
19
+ * 1 - Serialized annotator file
20
+ * 2 - Input file
21
+ * 3 - Output file
22
+ * 4 - if annotator type is "scope", then should scope be limited by commas
23
+ * 5 - if annotator type is "scope", then should scope be limited by periods
24
+ * 6 - (boolean) does the input file contain annotated sentence (true) or
25
+ * not (false)
26
+ */
27
+ public static void main(String[] args) {
28
+ Annotator annotator;
29
+
30
+ if (args[0].equalsIgnoreCase("scope")) {
31
+ annotator = new BaselineScopeAnnotator(SentenceTagger.SCOPE_START,
32
+ SentenceTagger.SCOPE_INTER, SentenceTagger.OTHER,
33
+ Boolean.parseBoolean(args[4]), Boolean.parseBoolean(args[5]));
34
+ } else {
35
+ annotator = SentenceTagger.getAnnotator("baseline", args[0]);
36
+ }
37
+ annotator.loadAnnotator(args[1]);
38
+
39
+ boolean isAnnotated = Boolean.parseBoolean(args[6]);
40
+ List<String> inputSentences = SentenceTagger.getStringList(args[2], isAnnotated);
41
+ List<AnnotatedSentence> outputSentences = SentenceTagger.annotateSentences(annotator, inputSentences, isAnnotated);
42
+ AnnotatedSentencesIO.write(args[3], outputSentences);
43
+
44
+ }
45
+ }
@@ -0,0 +1,31 @@
1
+ package lingscope.drivers;
2
+
3
+ import java.util.List;
4
+ import lingscope.algorithms.Annotator;
5
+ import lingscope.io.AnnotatedSentencesIO;
6
+ import lingscope.structures.AnnotatedSentence;
7
+
8
+ /**
9
+ * Driver to mark scope or cue in a file
10
+ * @author shashank
11
+ */
12
+ public class CrfDriver {
13
+ /**
14
+ *
15
+ * @param args
16
+ * 0 - Annotator type ("cue" or "scope")
17
+ * 1 - Serialized annotator file
18
+ * 2 - Input file
19
+ * 3 - Output file
20
+ * 4 - (boolean) does the input file contain annotated sentence (true) or
21
+ * not (false)
22
+ */
23
+ public static void main(String[] args) {
24
+ Annotator annotator = SentenceTagger.getAnnotator("crf", args[0]);
25
+ annotator.loadAnnotator(args[1]);
26
+ boolean isAnnotated = Boolean.parseBoolean(args[4]);
27
+ List<String> inputSentences = SentenceTagger.getStringList(args[2], isAnnotated);
28
+ List<AnnotatedSentence> outputSentences = SentenceTagger.annotateSentences(annotator, inputSentences, isAnnotated);
29
+ AnnotatedSentencesIO.write(args[3], outputSentences);
30
+ }
31
+ }
@@ -0,0 +1,86 @@
1
+ package lingscope.drivers;
2
+
3
+ import generalutils.FileOperations;
4
+ import java.util.ArrayList;
5
+ import java.util.List;
6
+ import java.util.logging.Level;
7
+ import java.util.logging.Logger;
8
+ import lingscope.io.AnnotatedSentencesIO;
9
+ import lingscope.structures.AnnotatedSentence;
10
+
11
+ /**
12
+ * Merges two files, one containing pos tags and the other containing
13
+ * annotated cues
14
+ * @author shashank
15
+ */
16
+ public class CueAndPosFilesMerger {
17
+
18
+ public static AnnotatedSentence merge(AnnotatedSentence cueSentence, String posSentence, boolean replaceTags) {
19
+ String[] posTags = posSentence.split("\\s+");
20
+ List<String> crfTags = cueSentence.getTags();
21
+ List<String> words = cueSentence.getWords();
22
+ List<Boolean> tagStatusList = cueSentence.getIsAnnotatedTags();
23
+ StringBuilder mergedSentence = new StringBuilder();
24
+ int numWords = posTags.length;
25
+ for (int j = 0; j < numWords; ++j) {
26
+ mergedSentence.append(" ");
27
+ String posTag = posTags[j];
28
+ String word = words.get(j);
29
+ String crfTag = crfTags.get(j);
30
+ boolean tagStatus = tagStatusList.get(j);
31
+ if (tagStatus) {
32
+ if (replaceTags) {
33
+ mergedSentence.append("CUE|");
34
+ } else {
35
+ mergedSentence.append(word).append("|");
36
+ }
37
+ } else {
38
+ mergedSentence.append(posTag).append("|");
39
+ }
40
+ mergedSentence.append(crfTag);
41
+ }
42
+
43
+ AnnotatedSentence mergedAnnotatedSentence = new AnnotatedSentence(mergedSentence.substring(1));
44
+ return mergedAnnotatedSentence;
45
+ }
46
+
47
+ /**
48
+ * Merges the cueSentences and posSentences
49
+ * @param cueSentences
50
+ * @param posSentences
51
+ * @param replaceTags
52
+ * @return
53
+ */
54
+ public static List<AnnotatedSentence> merge(List<AnnotatedSentence> cueSentences, List<String> posSentences, boolean replaceTags) {
55
+ List<AnnotatedSentence> mergedSentences = new ArrayList<AnnotatedSentence>(cueSentences.size());
56
+ int numSentences = posSentences.size();
57
+ for (int i = 0; i < numSentences; ++i) {
58
+ AnnotatedSentence cueSentence = cueSentences.get(i);
59
+ String posSentence = posSentences.get(i);
60
+ AnnotatedSentence mergedAnnotatedSentence = merge(cueSentence, posSentence, replaceTags);
61
+ mergedSentences.add(mergedAnnotatedSentence);
62
+ }
63
+ return mergedSentences;
64
+ }
65
+
66
+ /**
67
+ *
68
+ * @param args
69
+ * 0 - cue input file
70
+ * 1 - pos input file
71
+ * 2 - replace cue with custom tag 'CUE' (true) or leave it as it is (false)
72
+ * 3 - merged file output path
73
+ */
74
+ public static void main(String[] args) {
75
+ boolean replaceTags = Boolean.parseBoolean(args[2]);
76
+ List<AnnotatedSentence> cueSentences = AnnotatedSentencesIO.read(args[0]);
77
+ List<String> posSentences = null;
78
+ try {
79
+ posSentences = FileOperations.readFile(args[1]);
80
+ } catch (Exception ex) {
81
+ Logger.getLogger(CueAndPosFilesMerger.class.getName()).log(Level.SEVERE, null, ex);
82
+ System.exit(1);
83
+ }
84
+ AnnotatedSentencesIO.write(args[3], merge(cueSentences, posSentences, replaceTags));
85
+ }
86
+ }
@@ -0,0 +1,39 @@
1
+ package lingscope.drivers;
2
+
3
+ import lingscope.algorithms.Annotator;
4
+
5
+ /**
6
+ * Driver to train a model file. The training data will have to be provided.
7
+ * @author shashank
8
+ */
9
+ public class ModelTrainer {
10
+
11
+ /**
12
+ * Prints the usage for the model trainer
13
+ */
14
+ public static void usage() {
15
+ System.out.println("Usage:\njava lingscope.drivers.ModelTrainer (cue|scope) (crf|baseline|negex) training_data_file file_where_model_will_be_saved");
16
+ }
17
+
18
+ /**
19
+ *
20
+ * @param args
21
+ * 0 - Annotator type ("cue" or "scope")
22
+ * 1 - Model type ("crf", "baseline" or "negex")
23
+ * 2 - File from which training data will be read
24
+ * 2 - File where the model will be saved
25
+ */
26
+ public static void main(String[] args) {
27
+ if (args.length != 4) {
28
+ usage();
29
+ System.exit(0);
30
+ }
31
+ Annotator annotator = SentenceTagger.getAnnotator(args[1], args[0]);
32
+ if (annotator == null) {
33
+ usage();
34
+ System.exit(1);
35
+ }
36
+ annotator.serializeAnnotator(args[2], args[3]);
37
+ }
38
+
39
+ }
@@ -0,0 +1,32 @@
1
+ package lingscope.drivers;
2
+
3
+ import java.util.List;
4
+ import lingscope.algorithms.Annotator;
5
+ import lingscope.io.AnnotatedSentencesIO;
6
+ import lingscope.structures.AnnotatedSentence;
7
+
8
+ /**
9
+ * Runs the annotators
10
+ * @author shashank
11
+ */
12
+ public class NegexDriver {
13
+
14
+ /**
15
+ *
16
+ * @param args
17
+ * 0 - Annotator type ("cue" or "scope")
18
+ * 1 - Serialized annotator file
19
+ * 2 - Input file
20
+ * 3 - Output file
21
+ * 4 - (boolean) does the input file contain annotated sentence (true) or
22
+ * not (false)
23
+ */
24
+ public static void main(String[] args) {
25
+ Annotator annotator = SentenceTagger.getAnnotator("negex", args[0]);
26
+ annotator.loadAnnotator(args[1]);
27
+ boolean isAnnotated = Boolean.parseBoolean(args[4]);
28
+ List<String> inputSentences = SentenceTagger.getStringList(args[2], isAnnotated);
29
+ List<AnnotatedSentence> outputSentences = SentenceTagger.annotateSentences(annotator, inputSentences, isAnnotated);
30
+ AnnotatedSentencesIO.write(args[3], outputSentences);
31
+ }
32
+ }
@@ -0,0 +1,62 @@
1
+ package lingscope.drivers;
2
+
3
+ import generalutils.FileOperations;
4
+ import java.util.ArrayList;
5
+ import java.util.List;
6
+ import java.util.logging.Level;
7
+ import java.util.logging.Logger;
8
+ import lingscope.algorithms.PosTagger;
9
+
10
+ /**
11
+ * Driver for the Part of Speech tagger. Use this to tag all sentences in a
12
+ * given file with part of speech tags
13
+ * @author shashank
14
+ */
15
+ public class PosTaggerDriver {
16
+
17
+ private static PosTagger posTagger = null;
18
+
19
+ /**
20
+ * Gets the equivalent POS sentence for the given sentenceToTag
21
+ * @param grammerFile file containing POS model
22
+ * @param sentenceToTag sentence for which POS tags will be returned
23
+ * @param is
24
+ * @return a string where the words from sentenceToTag are replaced with
25
+ * corresponding part of speech tags
26
+ */
27
+ public static String getTaggedSentence(String grammerFile, String sentenceToTag, boolean isTokenized) {
28
+ if (posTagger == null) {
29
+ posTagger = new PosTagger(grammerFile);
30
+ }
31
+ List<String> posTags = posTagger.replaceWordsWithPos(sentenceToTag, isTokenized);
32
+ StringBuilder posSentence = new StringBuilder();
33
+ for (String posTag : posTags) {
34
+ posSentence.append(" ").append(posTag);
35
+ }
36
+ return posSentence.substring(1);
37
+ }
38
+
39
+ /**
40
+ *
41
+ * @param args
42
+ * 0 - file containing the part of speech model
43
+ * 1 - input file
44
+ * 2 - output file
45
+ * 3 - (boolean) does the input file contain annotated sentence (true) or
46
+ * not (false)
47
+ */
48
+ public static void main(String[] args) {
49
+ String grammarFile = args[0];
50
+ List<String> inputSentences = SentenceTagger.getStringList(args[1], Boolean.parseBoolean(args[3]));
51
+ List<String> outputSentences = new ArrayList<String>(inputSentences.size());
52
+ for (String inputSentence : inputSentences) {
53
+ String outputSentence = getTaggedSentence(grammarFile, inputSentence, Boolean.parseBoolean(args[3]));
54
+ outputSentences.add(outputSentence);
55
+ }
56
+ try {
57
+ FileOperations.writeFile(args[2], outputSentences);
58
+ } catch (Exception ex) {
59
+ Logger.getLogger(PosTaggerDriver.class.getName()).log(Level.SEVERE, null, ex);
60
+ }
61
+ }
62
+ }
@@ -0,0 +1,89 @@
1
+ package lingscope.drivers;
2
+
3
+ import generalutils.FileOperations;
4
+ import java.util.List;
5
+ import lingscope.algorithms.Annotator;
6
+ import lingscope.structures.AnnotatedSentence;
7
+
8
+ /**
9
+ * Use this sentence tagger when using a model that tags POS
10
+ * @author shashank
11
+ */
12
+ public class SentencePosTagger {
13
+
14
+ public static void usage() {
15
+ System.out.println("java lingscope.drivers.SentencePosTagger cue_tagging_model "
16
+ + "cue_tagger_type(baseline|crf|negex) "
17
+ + "replace_cue_with_custom_tag(true|false) scope_tagging_model "
18
+ + "pos_model_file sentence_to_tag");
19
+ System.out.println("\tSaved model for negation can be obtained from http://negscope.askhermes.org/");
20
+ System.out.println("\tSaved model for speculation can be obtained from http://hedgescope.askhermes.org/");
21
+ System.out.println("\tSaved model for NegEx can be obtained from http://code.google.com/p/negex/downloads/list");
22
+ System.out.println("\tSaved pos_model_file can be obtained from http://hedgescope.askhermes.org/");
23
+ }
24
+
25
+ /**
26
+ *
27
+ * @param args
28
+ * 0 - cue tagging model
29
+ * 1 - cue tagger type (baseline, crf or negex)
30
+ * 2 - replace cue words with custom tag CUE (true) or not (false)
31
+ * 3 - crf pos-based scope tagging model
32
+ * 4 - POS model file
33
+ * 5 - sentence to tag
34
+ */
35
+ public static void main(String[] args) {
36
+ if (args[0].equalsIgnoreCase("help")) {
37
+ usage();
38
+ System.exit(0);
39
+ } else if (args.length < 6) {
40
+ usage();
41
+ System.exit(1);
42
+ }
43
+ Annotator cueAnnotator = SentenceTagger.getAnnotator(args[1], "cue");
44
+ cueAnnotator.loadAnnotator(args[0]);
45
+ Annotator scopeAnnotator = SentenceTagger.getAnnotator("crf", "scope");
46
+ scopeAnnotator.loadAnnotator(args[3]);
47
+ String sentence = args[5];
48
+ String grammarFile = args[4];
49
+
50
+ if ("file".equalsIgnoreCase(sentence)) {
51
+ String sentencesFile = args[6];
52
+ try {
53
+ List<String> sentences = FileOperations.readFile(sentencesFile);
54
+ for (String sentenceText : sentences) {
55
+ tagSentence(sentenceText, grammarFile,
56
+ Boolean.parseBoolean(args[2]), cueAnnotator,
57
+ scopeAnnotator);
58
+ }
59
+ } catch (Exception ex) {
60
+ ex.printStackTrace(System.err);
61
+ }
62
+ } else {
63
+ tagSentence(sentence, grammarFile, Boolean.parseBoolean(args[2]),
64
+ cueAnnotator, scopeAnnotator);
65
+ }
66
+ }
67
+
68
+ /**
69
+ * Tags the given sentence
70
+ * @param sentence the text of the sentence to tag
71
+ * @param grammarFile path to the Stanford part of speech model file
72
+ * @param replaceCueWords if true, cue words will be replaced with custom
73
+ * tag 'CUE'
74
+ * @param cueAnnotator the {@link Annotator} object to identify negation or
75
+ * hedge cue in the sentence
76
+ * @param scopeAnnotator the {@link Annotator} object to identify negation
77
+ * or hedge scope in the sentence
78
+ */
79
+ public static void tagSentence(String sentence, String grammarFile,
80
+ boolean replaceCueWords, Annotator cueAnnotator, Annotator scopeAnnotator) {
81
+ String posSentence = PosTaggerDriver.getTaggedSentence(grammarFile, sentence, false);
82
+ AnnotatedSentence cueTaggedSentence = cueAnnotator.annotateSentence(sentence, false);
83
+ AnnotatedSentence posCueMerged = CueAndPosFilesMerger.merge(cueTaggedSentence, posSentence, replaceCueWords);
84
+ AnnotatedSentence scopeMarkedSentence = scopeAnnotator.annotateSentence(posCueMerged.getSentenceText(), true);
85
+ AnnotatedSentence scopeWordsMarkedSentence = AnnotatedFilesMerger.merge(cueTaggedSentence, scopeMarkedSentence);
86
+ System.out.println(scopeWordsMarkedSentence.getRawText());
87
+
88
+ }
89
+ }