abstractor 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. checksums.yaml +15 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +14 -0
  4. data/Rakefile +33 -0
  5. data/app/assets/images/abstractor/add.png +0 -0
  6. data/app/assets/images/abstractor/ajax-loader.gif +0 -0
  7. data/app/assets/images/abstractor/bar.gif +0 -0
  8. data/app/assets/images/abstractor/bti_logo.jpg +0 -0
  9. data/app/assets/images/abstractor/bullet_arrow_down.png +0 -0
  10. data/app/assets/images/abstractor/bullet_arrow_up.png +0 -0
  11. data/app/assets/images/abstractor/cog.png +0 -0
  12. data/app/assets/images/abstractor/delete.png +0 -0
  13. data/app/assets/images/abstractor/edit.png +0 -0
  14. data/app/assets/images/abstractor/excel.png +0 -0
  15. data/app/assets/images/abstractor/favicon.ico +0 -0
  16. data/app/assets/images/abstractor/greencheck.gif +0 -0
  17. data/app/assets/images/abstractor/loading.gif +0 -0
  18. data/app/assets/images/abstractor/nu_logo.jpg +0 -0
  19. data/app/assets/images/abstractor/nubic_logo.png +0 -0
  20. data/app/assets/images/abstractor/page.png +0 -0
  21. data/app/assets/images/abstractor/rails.png +0 -0
  22. data/app/assets/images/abstractor/robert_h_lurie.jpg +0 -0
  23. data/app/assets/images/abstractor/show.png +0 -0
  24. data/app/assets/images/abstractor/switch_minus.gif +0 -0
  25. data/app/assets/images/abstractor/switch_plus.gif +0 -0
  26. data/app/assets/javascripts/abstractor/abstractor.js +89 -0
  27. data/app/assets/javascripts/abstractor/application.js +21 -0
  28. data/app/assets/javascripts/abstractor/combobox.js +301 -0
  29. data/app/assets/javascripts/abstractor/jquery/jquery.form.js +1074 -0
  30. data/app/assets/javascripts/abstractor/nested_attributes.js +69 -0
  31. data/app/assets/stylesheets/abstractor/abstractor_abstractions.css +312 -0
  32. data/app/assets/stylesheets/abstractor/application.css +21 -0
  33. data/app/assets/stylesheets/abstractor/jquery.ui.override.css.scss +73 -0
  34. data/app/controllers/abstractor/abstractor_abstraction_groups_controller.rb +5 -0
  35. data/app/controllers/abstractor/abstractor_abstractions_controller.rb +5 -0
  36. data/app/controllers/abstractor/abstractor_suggestions_controller.rb +5 -0
  37. data/app/controllers/abstractor/application_controller.rb +2 -0
  38. data/app/helpers/abstractor/application_helper.rb +4 -0
  39. data/app/models/abstractor/abstractor_abstraction.rb +6 -0
  40. data/app/models/abstractor/abstractor_abstraction_group.rb +6 -0
  41. data/app/models/abstractor/abstractor_abstraction_group_member.rb +6 -0
  42. data/app/models/abstractor/abstractor_abstraction_schema.rb +6 -0
  43. data/app/models/abstractor/abstractor_abstraction_schema_object_value.rb +6 -0
  44. data/app/models/abstractor/abstractor_abstraction_schema_predicate_variant.rb +6 -0
  45. data/app/models/abstractor/abstractor_abstraction_schema_relation.rb +6 -0
  46. data/app/models/abstractor/abstractor_abstraction_source.rb +6 -0
  47. data/app/models/abstractor/abstractor_object_type.rb +6 -0
  48. data/app/models/abstractor/abstractor_object_value.rb +6 -0
  49. data/app/models/abstractor/abstractor_object_value_variant.rb +6 -0
  50. data/app/models/abstractor/abstractor_relation_type.rb +6 -0
  51. data/app/models/abstractor/abstractor_rule_type.rb +6 -0
  52. data/app/models/abstractor/abstractor_subject.rb +6 -0
  53. data/app/models/abstractor/abstractor_subject_group.rb +6 -0
  54. data/app/models/abstractor/abstractor_subject_group_member.rb +6 -0
  55. data/app/models/abstractor/abstractor_subject_relation.rb +6 -0
  56. data/app/models/abstractor/abstractor_suggestion.rb +6 -0
  57. data/app/models/abstractor/abstractor_suggestion_object_value.rb +6 -0
  58. data/app/models/abstractor/abstractor_suggestion_source.rb +6 -0
  59. data/app/models/abstractor/abstractor_suggestion_status.rb +6 -0
  60. data/app/views/abstractor/abstractor_abstraction_groups/_form.html.haml +10 -0
  61. data/app/views/abstractor/abstractor_abstraction_groups/edit.html.haml +1 -0
  62. data/app/views/abstractor/abstractor_abstractions/_fields.html.haml +63 -0
  63. data/app/views/abstractor/abstractor_abstractions/_list.html.haml +45 -0
  64. data/app/views/abstractor/abstractor_abstractions/edit.html.haml +53 -0
  65. data/app/views/abstractor/abstractor_abstractions/show.html.haml +1 -0
  66. data/app/views/abstractor/shared/_error_messages.html.haml +5 -0
  67. data/config/cucumber.yml +8 -0
  68. data/config/routes.rb +7 -0
  69. data/db/migrate/20131227205140_create_abstractor_object_types.rb +10 -0
  70. data/db/migrate/20131227205219_create_abstractor_object_values.rb +10 -0
  71. data/db/migrate/20131227205256_create_abstractor_object_value_variants.rb +11 -0
  72. data/db/migrate/20131227205320_create_abstractor_relation_types.rb +10 -0
  73. data/db/migrate/20131227205354_create_abstractor_rule_types.rb +11 -0
  74. data/db/migrate/20131227205432_create_abstractor_abstraction_schemas.rb +13 -0
  75. data/db/migrate/20131227205456_create_abstractor_abstraction_schema_object_values.rb +10 -0
  76. data/db/migrate/20131227205529_create_abstractor_abstraction_schema_predicate_variants.rb +11 -0
  77. data/db/migrate/20131227205610_create_abstractor_abstraction_schema_relations.rb +11 -0
  78. data/db/migrate/20131227205652_create_abstractor_subjects.rb +12 -0
  79. data/db/migrate/20131227205732_create_abstractor_subject_relations.rb +11 -0
  80. data/db/migrate/20131227205831_create_abstractor_abstraction_sources.rb +11 -0
  81. data/db/migrate/20131227210211_create_abstractor_subject_groups.rb +10 -0
  82. data/db/migrate/20131227210244_create_abstractor_subject_group_members.rb +11 -0
  83. data/db/migrate/20131227210350_create_abstractor_abstraction_groups.rb +12 -0
  84. data/db/migrate/20131227210353_create_abstractor_abstraction_group_members.rb +11 -0
  85. data/db/migrate/20131227211050_create_abstractor_suggestion_statuses.rb +10 -0
  86. data/db/migrate/20131227211303_create_abstractor_abstractions.rb +15 -0
  87. data/db/migrate/20131227213427_create_abstractor_suggestions.rb +14 -0
  88. data/db/migrate/20131228041944_create_abstractor_suggestion_object_values.rb +11 -0
  89. data/db/migrate/20131228041945_create_abstractor_suggestion_sources.rb +16 -0
  90. data/db/seeds.rb +0 -0
  91. data/lib/abstractor.rb +8 -0
  92. data/lib/abstractor/abstractable.rb +190 -0
  93. data/lib/abstractor/core_ext/string.rb +99 -0
  94. data/lib/abstractor/engine.rb +14 -0
  95. data/lib/abstractor/methods/controllers/abstractor_abstraction_groups_controller.rb +37 -0
  96. data/lib/abstractor/methods/controllers/abstractor_abstractions_controller.rb +42 -0
  97. data/lib/abstractor/methods/controllers/abstractor_suggestions_controller.rb +28 -0
  98. data/lib/abstractor/methods/models/abstractor_abstraction.rb +65 -0
  99. data/lib/abstractor/methods/models/abstractor_abstraction_group.rb +37 -0
  100. data/lib/abstractor/methods/models/abstractor_abstraction_group_member.rb +17 -0
  101. data/lib/abstractor/methods/models/abstractor_abstraction_schema.rb +27 -0
  102. data/lib/abstractor/methods/models/abstractor_abstraction_schema_object_value.rb +17 -0
  103. data/lib/abstractor/methods/models/abstractor_abstraction_schema_predicate_variant.rb +16 -0
  104. data/lib/abstractor/methods/models/abstractor_abstraction_schema_relation.rb +18 -0
  105. data/lib/abstractor/methods/models/abstractor_abstraction_source.rb +29 -0
  106. data/lib/abstractor/methods/models/abstractor_object_type.rb +16 -0
  107. data/lib/abstractor/methods/models/abstractor_object_value.rb +24 -0
  108. data/lib/abstractor/methods/models/abstractor_object_value_variant.rb +16 -0
  109. data/lib/abstractor/methods/models/abstractor_relation_type.rb +16 -0
  110. data/lib/abstractor/methods/models/abstractor_rule_type.rb +17 -0
  111. data/lib/abstractor/methods/models/abstractor_subject.rb +258 -0
  112. data/lib/abstractor/methods/models/abstractor_subject_group.rb +19 -0
  113. data/lib/abstractor/methods/models/abstractor_subject_group_member.rb +17 -0
  114. data/lib/abstractor/methods/models/abstractor_subject_relation.rb +18 -0
  115. data/lib/abstractor/methods/models/abstractor_suggestion.rb +88 -0
  116. data/lib/abstractor/methods/models/abstractor_suggestion_object_value.rb +17 -0
  117. data/lib/abstractor/methods/models/abstractor_suggestion_source.rb +17 -0
  118. data/lib/abstractor/methods/models/abstractor_suggestion_status.rb +29 -0
  119. data/lib/abstractor/methods/models/soft_delete.rb +35 -0
  120. data/lib/abstractor/negation_detection.rb +43 -0
  121. data/lib/abstractor/parser.rb +76 -0
  122. data/lib/abstractor/setup.rb +24 -0
  123. data/lib/abstractor/user_interface.rb +40 -0
  124. data/lib/abstractor/utility.rb +8 -0
  125. data/lib/abstractor/version.rb +3 -0
  126. data/lib/generators/abstractor/install/install_generator.rb +118 -0
  127. data/lib/generators/abstractor/install/templates/controllers/abstractor_abstraction_groups_controller.rb +16 -0
  128. data/lib/generators/abstractor/install/templates/controllers/abstractor_abstractions_controller.rb +24 -0
  129. data/lib/generators/abstractor/install/templates/controllers/abstractor_suggestions_controller.rb +12 -0
  130. data/lib/generators/abstractor/install/templates/dictionaries.yml +235 -0
  131. data/lib/generators/abstractor/install/templates/helpers/abstractions_helper.rb +9 -0
  132. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction.rb +9 -0
  133. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group.rb +9 -0
  134. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group_member.rb +9 -0
  135. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema.rb +9 -0
  136. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_object_value.rb +9 -0
  137. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_predicate_variant.rb +9 -0
  138. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_relation.rb +9 -0
  139. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_source.rb +9 -0
  140. data/lib/generators/abstractor/install/templates/models/abstractor_object_type.rb +9 -0
  141. data/lib/generators/abstractor/install/templates/models/abstractor_object_value.rb +9 -0
  142. data/lib/generators/abstractor/install/templates/models/abstractor_object_value_variant.rb +9 -0
  143. data/lib/generators/abstractor/install/templates/models/abstractor_relation_type.rb +9 -0
  144. data/lib/generators/abstractor/install/templates/models/abstractor_rule_type.rb +9 -0
  145. data/lib/generators/abstractor/install/templates/models/abstractor_subject.rb +9 -0
  146. data/lib/generators/abstractor/install/templates/models/abstractor_subject_group.rb +9 -0
  147. data/lib/generators/abstractor/install/templates/models/abstractor_subject_group_member.rb +9 -0
  148. data/lib/generators/abstractor/install/templates/models/abstractor_subject_relation.rb +9 -0
  149. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion.rb +9 -0
  150. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_object_value.rb +9 -0
  151. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_source.rb +9 -0
  152. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_status.rb +9 -0
  153. data/lib/lingscope/build.xml +74 -0
  154. data/lib/lingscope/build/built-jar.properties +4 -0
  155. data/lib/lingscope/build/classes/lingscope/algorithms/AbnerTokenizer.class +0 -0
  156. data/lib/lingscope/build/classes/lingscope/algorithms/AnnotationComparer.class +0 -0
  157. data/lib/lingscope/build/classes/lingscope/algorithms/Annotator.class +0 -0
  158. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineAnnotator.class +0 -0
  159. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineCueAnnotator.class +0 -0
  160. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineScopeAnnotator.class +0 -0
  161. data/lib/lingscope/build/classes/lingscope/algorithms/CrfAnnotator.class +0 -0
  162. data/lib/lingscope/build/classes/lingscope/algorithms/NegexAnnotator.class +0 -0
  163. data/lib/lingscope/build/classes/lingscope/algorithms/NegexCueAnnotator.class +0 -0
  164. data/lib/lingscope/build/classes/lingscope/algorithms/NegexScopeAnnotator.class +0 -0
  165. data/lib/lingscope/build/classes/lingscope/algorithms/PosTagger.class +0 -0
  166. data/lib/lingscope/build/classes/lingscope/algorithms/negex/GenNegEx.class +0 -0
  167. data/lib/lingscope/build/classes/lingscope/algorithms/negex/Sorter.class +0 -0
  168. data/lib/lingscope/build/classes/lingscope/drivers/AnnotatedFilesMerger.class +0 -0
  169. data/lib/lingscope/build/classes/lingscope/drivers/AnnotationComparerDriver.class +0 -0
  170. data/lib/lingscope/build/classes/lingscope/drivers/BaselineDriver.class +0 -0
  171. data/lib/lingscope/build/classes/lingscope/drivers/CrfDriver.class +0 -0
  172. data/lib/lingscope/build/classes/lingscope/drivers/CueAndPosFilesMerger.class +0 -0
  173. data/lib/lingscope/build/classes/lingscope/drivers/ModelTrainer.class +0 -0
  174. data/lib/lingscope/build/classes/lingscope/drivers/NegexDriver.class +0 -0
  175. data/lib/lingscope/build/classes/lingscope/drivers/PosTaggerDriver.class +0 -0
  176. data/lib/lingscope/build/classes/lingscope/drivers/SentencePosTagger.class +0 -0
  177. data/lib/lingscope/build/classes/lingscope/drivers/SentenceTagger.class +0 -0
  178. data/lib/lingscope/build/classes/lingscope/io/AnnotatedSentencesIO.class +0 -0
  179. data/lib/lingscope/build/classes/lingscope/structures/AnnotatedSentence.class +0 -0
  180. data/lib/lingscope/dist/README.TXT +32 -0
  181. data/lib/lingscope/dist/javadoc/allclasses-frame.html +80 -0
  182. data/lib/lingscope/dist/javadoc/allclasses-noframe.html +80 -0
  183. data/lib/lingscope/dist/javadoc/constant-values.html +199 -0
  184. data/lib/lingscope/dist/javadoc/deprecated-list.html +147 -0
  185. data/lib/lingscope/dist/javadoc/help-doc.html +224 -0
  186. data/lib/lingscope/dist/javadoc/index-files/index-1.html +188 -0
  187. data/lib/lingscope/dist/javadoc/index-files/index-10.html +149 -0
  188. data/lib/lingscope/dist/javadoc/index-files/index-11.html +158 -0
  189. data/lib/lingscope/dist/javadoc/index-files/index-12.html +157 -0
  190. data/lib/lingscope/dist/javadoc/index-files/index-13.html +177 -0
  191. data/lib/lingscope/dist/javadoc/index-files/index-14.html +155 -0
  192. data/lib/lingscope/dist/javadoc/index-files/index-15.html +152 -0
  193. data/lib/lingscope/dist/javadoc/index-files/index-16.html +146 -0
  194. data/lib/lingscope/dist/javadoc/index-files/index-2.html +158 -0
  195. data/lib/lingscope/dist/javadoc/index-files/index-3.html +165 -0
  196. data/lib/lingscope/dist/javadoc/index-files/index-4.html +146 -0
  197. data/lib/lingscope/dist/javadoc/index-files/index-5.html +219 -0
  198. data/lib/lingscope/dist/javadoc/index-files/index-6.html +149 -0
  199. data/lib/lingscope/dist/javadoc/index-files/index-7.html +155 -0
  200. data/lib/lingscope/dist/javadoc/index-files/index-8.html +185 -0
  201. data/lib/lingscope/dist/javadoc/index-files/index-9.html +164 -0
  202. data/lib/lingscope/dist/javadoc/index.html +74 -0
  203. data/lib/lingscope/dist/javadoc/lingscope/algorithms/AbnerTokenizer.html +280 -0
  204. data/lib/lingscope/dist/javadoc/lingscope/algorithms/AnnotationComparer.html +526 -0
  205. data/lib/lingscope/dist/javadoc/lingscope/algorithms/Annotator.html +401 -0
  206. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineAnnotator.html +375 -0
  207. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineCueAnnotator.html +309 -0
  208. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineScopeAnnotator.html +340 -0
  209. data/lib/lingscope/dist/javadoc/lingscope/algorithms/CrfAnnotator.html +340 -0
  210. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexAnnotator.html +364 -0
  211. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexCueAnnotator.html +309 -0
  212. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexScopeAnnotator.html +309 -0
  213. data/lib/lingscope/dist/javadoc/lingscope/algorithms/PosTagger.html +268 -0
  214. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AbnerTokenizer.html +145 -0
  215. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AnnotationComparer.html +145 -0
  216. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/Annotator.html +299 -0
  217. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineAnnotator.html +189 -0
  218. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineCueAnnotator.html +145 -0
  219. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineScopeAnnotator.html +145 -0
  220. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/CrfAnnotator.html +145 -0
  221. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexAnnotator.html +189 -0
  222. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexCueAnnotator.html +145 -0
  223. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexScopeAnnotator.html +145 -0
  224. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/PosTagger.html +145 -0
  225. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/GenNegEx.html +369 -0
  226. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/Sorter.html +253 -0
  227. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/GenNegEx.html +181 -0
  228. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/Sorter.html +145 -0
  229. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-frame.html +35 -0
  230. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-summary.html +165 -0
  231. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-tree.html +154 -0
  232. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-use.html +174 -0
  233. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-frame.html +53 -0
  234. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-summary.html +198 -0
  235. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-tree.html +160 -0
  236. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-use.html +202 -0
  237. data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotatedFilesMerger.html +284 -0
  238. data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotationComparerDriver.html +258 -0
  239. data/lib/lingscope/dist/javadoc/lingscope/drivers/BaselineDriver.html +260 -0
  240. data/lib/lingscope/dist/javadoc/lingscope/drivers/CrfDriver.html +262 -0
  241. data/lib/lingscope/dist/javadoc/lingscope/drivers/CueAndPosFilesMerger.html +310 -0
  242. data/lib/lingscope/dist/javadoc/lingscope/drivers/ModelTrainer.html +281 -0
  243. data/lib/lingscope/dist/javadoc/lingscope/drivers/NegexDriver.html +262 -0
  244. data/lib/lingscope/dist/javadoc/lingscope/drivers/PosTaggerDriver.html +289 -0
  245. data/lib/lingscope/dist/javadoc/lingscope/drivers/SentencePosTagger.html +313 -0
  246. data/lib/lingscope/dist/javadoc/lingscope/drivers/SentenceTagger.html +512 -0
  247. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotatedFilesMerger.html +145 -0
  248. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotationComparerDriver.html +145 -0
  249. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/BaselineDriver.html +145 -0
  250. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CrfDriver.html +145 -0
  251. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CueAndPosFilesMerger.html +145 -0
  252. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/ModelTrainer.html +145 -0
  253. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/NegexDriver.html +145 -0
  254. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/PosTaggerDriver.html +145 -0
  255. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentencePosTagger.html +145 -0
  256. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentenceTagger.html +145 -0
  257. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-frame.html +51 -0
  258. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-summary.html +195 -0
  259. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-tree.html +154 -0
  260. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-use.html +145 -0
  261. data/lib/lingscope/dist/javadoc/lingscope/io/AnnotatedSentencesIO.html +285 -0
  262. data/lib/lingscope/dist/javadoc/lingscope/io/class-use/AnnotatedSentencesIO.html +145 -0
  263. data/lib/lingscope/dist/javadoc/lingscope/io/package-frame.html +33 -0
  264. data/lib/lingscope/dist/javadoc/lingscope/io/package-summary.html +158 -0
  265. data/lib/lingscope/dist/javadoc/lingscope/io/package-tree.html +154 -0
  266. data/lib/lingscope/dist/javadoc/lingscope/io/package-use.html +145 -0
  267. data/lib/lingscope/dist/javadoc/lingscope/structures/AnnotatedSentence.html +425 -0
  268. data/lib/lingscope/dist/javadoc/lingscope/structures/class-use/AnnotatedSentence.html +423 -0
  269. data/lib/lingscope/dist/javadoc/lingscope/structures/package-frame.html +33 -0
  270. data/lib/lingscope/dist/javadoc/lingscope/structures/package-summary.html +158 -0
  271. data/lib/lingscope/dist/javadoc/lingscope/structures/package-tree.html +154 -0
  272. data/lib/lingscope/dist/javadoc/lingscope/structures/package-use.html +209 -0
  273. data/lib/lingscope/dist/javadoc/overview-frame.html +51 -0
  274. data/lib/lingscope/dist/javadoc/overview-summary.html +168 -0
  275. data/lib/lingscope/dist/javadoc/overview-tree.html +159 -0
  276. data/lib/lingscope/dist/javadoc/package-list +5 -0
  277. data/lib/lingscope/dist/javadoc/resources/inherit.gif +0 -0
  278. data/lib/lingscope/dist/javadoc/stylesheet.css +29 -0
  279. data/lib/lingscope/dist/lib/abner.jar +0 -0
  280. data/lib/lingscope/dist/lib/commons-lang-2.4.jar +0 -0
  281. data/lib/lingscope/dist/lib/java_utils.jar +0 -0
  282. data/lib/lingscope/dist/lib/sptoolkit.jar +0 -0
  283. data/lib/lingscope/dist/lib/stanford-postagger.jar +0 -0
  284. data/lib/lingscope/dist/lingscope.jar +0 -0
  285. data/lib/lingscope/lingscope.zip +0 -0
  286. data/lib/lingscope/manifest.mf +3 -0
  287. data/lib/lingscope/nbproject/build-impl.xml +1338 -0
  288. data/lib/lingscope/nbproject/genfiles.properties +8 -0
  289. data/lib/lingscope/nbproject/private/config.properties +0 -0
  290. data/lib/lingscope/nbproject/private/private.properties +10 -0
  291. data/lib/lingscope/nbproject/private/private.xml +5 -0
  292. data/lib/lingscope/nbproject/project.properties +85 -0
  293. data/lib/lingscope/nbproject/project.xml +15 -0
  294. data/lib/lingscope/negation_models.zip +0 -0
  295. data/lib/lingscope/negation_models/baseline_cue_all_both.model +49 -0
  296. data/lib/lingscope/negation_models/baseline_cue_all_clinical.model +19 -0
  297. data/lib/lingscope/negation_models/crf_cue_all_both.model +0 -0
  298. data/lib/lingscope/negation_models/crf_cue_all_clinical.model +0 -0
  299. data/lib/lingscope/negation_models/crf_scope_cue_crf_all_both.model +0 -0
  300. data/lib/lingscope/negation_models/crf_scope_cue_crf_all_clinical.model +0 -0
  301. data/lib/lingscope/negation_models/crf_scope_cue_regex_all_both.model +0 -0
  302. data/lib/lingscope/negation_models/crf_scope_cue_regex_all_clinical.model +0 -0
  303. data/lib/lingscope/negation_models/crf_scope_words_all_both.model +0 -0
  304. data/lib/lingscope/negation_models/crf_scope_words_all_clinical.model +0 -0
  305. data/lib/lingscope/negation_models/crf_scope_words_crf_all_both.model +0 -0
  306. data/lib/lingscope/negation_models/crf_scope_words_crf_all_clinical.model +0 -0
  307. data/lib/lingscope/negation_models/crf_scope_words_regex_all_both.model +0 -0
  308. data/lib/lingscope/negation_models/crf_scope_words_regex_all_clinical.model +0 -0
  309. data/lib/lingscope/src/lingscope/algorithms/AbnerTokenizer.java +58 -0
  310. data/lib/lingscope/src/lingscope/algorithms/AnnotationComparer.java +207 -0
  311. data/lib/lingscope/src/lingscope/algorithms/Annotator.java +51 -0
  312. data/lib/lingscope/src/lingscope/algorithms/BaselineAnnotator.java +80 -0
  313. data/lib/lingscope/src/lingscope/algorithms/BaselineCueAnnotator.java +84 -0
  314. data/lib/lingscope/src/lingscope/algorithms/BaselineScopeAnnotator.java +101 -0
  315. data/lib/lingscope/src/lingscope/algorithms/CrfAnnotator.java +45 -0
  316. data/lib/lingscope/src/lingscope/algorithms/NegexAnnotator.java +52 -0
  317. data/lib/lingscope/src/lingscope/algorithms/NegexCueAnnotator.java +26 -0
  318. data/lib/lingscope/src/lingscope/algorithms/NegexScopeAnnotator.java +26 -0
  319. data/lib/lingscope/src/lingscope/algorithms/PosTagger.java +54 -0
  320. data/lib/lingscope/src/lingscope/algorithms/negex/GenNegEx.java +530 -0
  321. data/lib/lingscope/src/lingscope/algorithms/negex/Sorter.java +36 -0
  322. data/lib/lingscope/src/lingscope/drivers/AnnotatedFilesMerger.java +61 -0
  323. data/lib/lingscope/src/lingscope/drivers/AnnotationComparerDriver.java +22 -0
  324. data/lib/lingscope/src/lingscope/drivers/BaselineDriver.java +45 -0
  325. data/lib/lingscope/src/lingscope/drivers/CrfDriver.java +31 -0
  326. data/lib/lingscope/src/lingscope/drivers/CueAndPosFilesMerger.java +86 -0
  327. data/lib/lingscope/src/lingscope/drivers/ModelTrainer.java +39 -0
  328. data/lib/lingscope/src/lingscope/drivers/NegexDriver.java +32 -0
  329. data/lib/lingscope/src/lingscope/drivers/PosTaggerDriver.java +62 -0
  330. data/lib/lingscope/src/lingscope/drivers/SentencePosTagger.java +89 -0
  331. data/lib/lingscope/src/lingscope/drivers/SentenceTagger.java +158 -0
  332. data/lib/lingscope/src/lingscope/io/AnnotatedSentencesIO.java +53 -0
  333. data/lib/lingscope/src/lingscope/structures/AnnotatedSentence.java +105 -0
  334. data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.csv +1 -0
  335. data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.txt +2273 -0
  336. data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.csv +1 -0
  337. data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.txt +1342 -0
  338. data/lib/setup/data/ICD-O-3_CSV-metadata/icd-o3 readme.txt +21 -0
  339. data/lib/setup/data/custom_site_synonyms.csv +1 -0
  340. data/lib/setup/data/diagnosis_categorizations.csv +1 -0
  341. data/lib/setup/data/high_level_primary_cns_diagnosis_categorizations.yml +597 -0
  342. data/lib/setup/data/icdo3_sites_with_laterality.txt +11 -0
  343. data/lib/setup/data/site_site_categories.txt +28 -0
  344. data/lib/tasks/abstractor_tasks.rake +22 -0
  345. data/lib/tasks/cucumber.rake +65 -0
  346. metadata +754 -0
@@ -0,0 +1,26 @@
1
+ package lingscope.algorithms;
2
+
3
+ import lingscope.structures.AnnotatedSentence;
4
+
5
+ /**
6
+ *
7
+ * @author shashank
8
+ */
9
+ public class NegexScopeAnnotator extends NegexAnnotator {
10
+
11
+ public NegexScopeAnnotator(String beginTag, String interTag, String otherTag) {
12
+ super(beginTag, interTag, otherTag);
13
+ }
14
+
15
+ @Override
16
+ public AnnotatedSentence annotateSentence(String sentence, boolean isTokenized) {
17
+ if (negex == null) {
18
+ throw new RuntimeException("Annotator has not been loaded");
19
+ }
20
+ if (!isTokenized) {
21
+ sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
22
+ }
23
+ String raw = negex.getScope(sentence, rules, beginTag, interTag, otherTag);
24
+ return new AnnotatedSentence(raw);
25
+ }
26
+ }
@@ -0,0 +1,54 @@
1
+ package lingscope.algorithms;
2
+
3
+ import edu.stanford.nlp.tagger.maxent.MaxentTagger;
4
+ import java.util.ArrayList;
5
+ import java.util.List;
6
+ import java.util.logging.Level;
7
+ import java.util.logging.Logger;
8
+
9
+ /**
10
+ * Part of speech tagger
11
+ * @author shashank
12
+ */
13
+ public class PosTagger {
14
+
15
+ private MaxentTagger posTagger;
16
+
17
+ /**
18
+ * Creates an instance of POS tagger by loading the given grammar file
19
+ * @param grammarFile
20
+ */
21
+ public PosTagger(String grammarFile) {
22
+ try {
23
+ posTagger = new MaxentTagger(grammarFile);
24
+ } catch (Exception ex) {
25
+ Logger.getLogger(PosTagger.class.getName()).log(Level.SEVERE, null, ex);
26
+ }
27
+ }
28
+
29
+ /**
30
+ * Takes a sentence as input and returns list of POS tags associated with
31
+ * each word in the sentence
32
+ * @param sentence
33
+ * @param isTokenized
34
+ * @return
35
+ */
36
+ public List<String> replaceWordsWithPos(String sentence, boolean isTokenized) {
37
+ if (!isTokenized) {
38
+ sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
39
+ }
40
+ List<String> ret = new ArrayList<String>();
41
+ String tagged = "";
42
+ try {
43
+ tagged = posTagger.tagString(sentence);
44
+ } catch (Exception ex) {
45
+ Logger.getLogger(PosTagger.class.getName()).log(Level.SEVERE, null, ex);
46
+ }
47
+ for (String wordTag : tagged.split(" +")) {
48
+ String[] tags = wordTag.split("/");
49
+ String tag = tags[tags.length - 1];
50
+ ret.add(tag);
51
+ }
52
+ return ret;
53
+ }
54
+ }
@@ -0,0 +1,530 @@
1
+ package lingscope.algorithms.negex;
2
+
3
+ import java.util.regex.Matcher;
4
+ import java.util.regex.Pattern;
5
+ import java.util.*;
6
+
7
+ /***************************************************************************************
8
+ * Author: Imre Solti
9
+ * Date: 09/15/2008
10
+ * Modified: 04/15/2009
11
+ * Changed to specifications of test kit and discussions with WC and PH.
12
+ * Modified: 04/26/2009
13
+ * Fixed the deletion of last character in scope fo PREN, PREP negation scopes.
14
+ *
15
+ * Wendy Chapman's NegEx algorithm in Java.
16
+ *
17
+ * Sentence boundaries serve as WINDOW for negation (suggested by Wendy Chapman)
18
+ *
19
+ ****************************************************************************************/
20
+
21
+ /*
22
+ Copyright 2008 Imre Solti
23
+
24
+ Licensed under the Apache License, Version 2.0 (the "License");
25
+
26
+ you may not use this file except in compliance with the License. You may obtain a copy of the License at
27
+
28
+ http://www.apache.org/licenses/LICENSE-2.0
29
+
30
+ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
31
+ WITHOUT
32
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and
33
+ limitations under the License.
34
+ */
35
+ public class GenNegEx {
36
+
37
+ private String previousRunScope;
38
+ private String[] sentenceWords;
39
+ private String[] tags;
40
+
41
+ public String getCue(String sentenceString, List<String> ruleStrings, String beginTag, String interTag, String otherTag) {
42
+ tagNegation(sentenceString, ruleStrings);
43
+ StringBuilder ret = new StringBuilder();
44
+ boolean inScope = false;
45
+ for (int i = 0; i < sentenceWords.length; ++i) {
46
+ String tag = tags[i];
47
+ String sentenceWord = sentenceWords[i];
48
+ ret.append(" ").append(sentenceWord).append("|");
49
+ if (tag.equalsIgnoreCase("cue")) {
50
+ if (inScope) {
51
+ ret.append(interTag);
52
+ } else {
53
+ ret.append(beginTag);
54
+ inScope = true;
55
+ }
56
+ } else {
57
+ ret.append(otherTag);
58
+ inScope = false;
59
+ }
60
+ }
61
+ return ret.substring(1);
62
+ }
63
+
64
+ public String getScope(String sentenceString, List<String> ruleStrings, String beginTag, String interTag, String otherTag) {
65
+ tagNegation(sentenceString, ruleStrings);
66
+ StringBuilder ret = new StringBuilder();
67
+ boolean inScope = false;
68
+ Pattern punct = Pattern.compile("\\p{Punct}");
69
+ for (int i = 0; i < sentenceWords.length; ++i) {
70
+ String tag = tags[i];
71
+ String sentenceWord = sentenceWords[i];
72
+ ret.append(" ").append(sentenceWord).append("|");
73
+ Matcher m = punct.matcher(sentenceWord);
74
+ if (m.matches()) {
75
+ if (i + 1 == sentenceWords.length) {
76
+ ret.append(otherTag);
77
+ continue;
78
+ } else if (tags[i + 1].equalsIgnoreCase("other")) {
79
+ ret.append(otherTag);
80
+ continue;
81
+ }
82
+ }
83
+ if (tag.equalsIgnoreCase("cue") || tag.equalsIgnoreCase("scope")) {
84
+ if (inScope) {
85
+ ret.append(interTag);
86
+ } else {
87
+ ret.append(beginTag);
88
+ inScope = true;
89
+ }
90
+ } else {
91
+ ret.append(otherTag);
92
+ inScope = false;
93
+ }
94
+ }
95
+ return ret.substring(1);
96
+ }
97
+
98
+ public void tagNegation(String sentenceString, List<String> ruleStrings) {
99
+ Sorter s = new Sorter();
100
+ sentenceWords = sentenceString.split("\\s+");
101
+ tags = new String[sentenceWords.length];
102
+ for (int i = 0; i < sentenceWords.length; ++i) {
103
+ tags[i] = "other";
104
+ }
105
+ String sToReturn = "";
106
+ String sScope = "";
107
+ List<String> sortedRules = new ArrayList<String>();
108
+
109
+ String filler = "__";
110
+ boolean negPoss = false;
111
+
112
+ // Sort the rules by length in descending order.
113
+ // Rules need to be sorted so the longest rule is always tried to match
114
+ // first.
115
+ // Some of the rules overlap so without sorting first shorter rules (some of them POSSIBLE or PSEUDO)
116
+ // would match before longer legitimate negation rules.
117
+ //
118
+
119
+ // There is efficiency issue here. It is better if rules are sorted by the
120
+ // calling program once and used without sorting in GennegEx.
121
+ sortedRules = s.sortRules(ruleStrings);
122
+
123
+ // Process the sentence and tag each matched negation
124
+ // rule with correct negation rule tag.
125
+ //
126
+ // At the same time check for the phrase that we want to decide
127
+ // the negation status for and
128
+ // tag the phrase with [PHRASE] ... [PHRASE]
129
+ // In both the negation rules and in the phrase replace white space
130
+ // with "filler" string. (This could cause problems if the sentences
131
+ // we study has "filler" on their own.)
132
+
133
+ // Sentence needs one character in the beginning and end to match.
134
+ // We remove the extra characters after processing.
135
+ String sentence = "." + sentenceString + ".";
136
+
137
+ // Tag the phrases we want to detect for negation.
138
+ // Should happen before rule detection.
139
+
140
+ Iterator<String> iRule = sortedRules.iterator();
141
+ while (iRule.hasNext()) {
142
+ String rule = iRule.next();
143
+ Pattern p = Pattern.compile("[\\t]+"); // Working.
144
+ String[] ruleTokens = p.split(rule.trim());
145
+ String[] ruleMembers = ruleTokens[0].trim().split(" ");
146
+
147
+
148
+ // Add the regular expression characters to tokens and asemble the rule again.
149
+ String rule2 = "";
150
+ for (int i = 0; i < ruleMembers.length; i++) {
151
+ if (!ruleMembers[i].equals("")) {
152
+ if (ruleMembers.length == 1) {
153
+ rule2 = ruleMembers[i];
154
+ } else {
155
+ rule2 = rule2 + ruleMembers[i].trim() + "\\s+";
156
+ }
157
+ }
158
+ }
159
+ // Remove the last s+
160
+ if (rule2.endsWith("\\s+")) {
161
+ rule2 = rule2.substring(0, rule2.lastIndexOf("\\s+"));
162
+ }
163
+
164
+ rule2 = "(?m)(?i)[[\\p{Punct}&&[^-_\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]";
165
+ // rule2 = "(?m)(?i)[[\\p{Punct}&&[^\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]";
166
+
167
+ Pattern p2 = Pattern.compile(rule2.trim());
168
+ Matcher m = p2.matcher(sentence);
169
+ Matcher m2 = p2.matcher(sentence);
170
+ if (m2.find()) {
171
+ /**
172
+ * Only PREN and POST rules matter
173
+ * added by Shashank
174
+ */
175
+ if (ruleTokens[1].equalsIgnoreCase("[PREN]") || ruleTokens[1].equalsIgnoreCase("[POST]")) {
176
+ for (int i = 0; i < sentenceWords.length; ++i) {
177
+ boolean perfectMatch = true;
178
+ for (int j = 0; j < ruleMembers.length; ++j) {
179
+ if (!sentenceWords[i + j].equalsIgnoreCase(ruleMembers[j])) {
180
+ perfectMatch = false;
181
+ break;
182
+ }
183
+ }
184
+ if (perfectMatch) {
185
+ for (int j = 0; j < ruleMembers.length; ++j) {
186
+ tags[i + j] = "cue";
187
+ }
188
+ }
189
+ }
190
+ }
191
+ }
192
+
193
+ while (m.find() == true) {
194
+ sentence = m.replaceAll(" " + ruleTokens[1].trim() + m.group().trim().replaceAll(" ", filler) + ruleTokens[1].trim() + " ");
195
+ }
196
+ }
197
+
198
+
199
+ // Exchange the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED]
200
+ // based of PREN, POST rules and if flag is set to true
201
+ // then based on PREP and POSP, as well.
202
+
203
+ // Because PRENEGATION [PREN} is checked first it takes precedent over
204
+ // POSTNEGATION [POST].
205
+ // Similarly POSTNEGATION [POST] takes precedent over POSSIBLE PRENEGATION [PREP]
206
+ // and [PREP] takes precedent over POSSIBLE POSTNEGATION [POSP].
207
+
208
+ String splitPattern = "\\s+|[^\\s]__";
209
+ Pattern pSpace = Pattern.compile(splitPattern);
210
+ String[] sentenceTokens = pSpace.split(sentence.trim());
211
+ StringBuilder sb = new StringBuilder();
212
+
213
+
214
+ // Check for [PREN]
215
+ for (int i = 0; i < sentenceTokens.length; i++) {
216
+ sb.append(" ").append(sentenceTokens[i].trim());
217
+ if (sentenceTokens[i].trim().startsWith("[PREN]")) {
218
+
219
+ for (int j = i + 1; j < sentenceTokens.length; j++) {
220
+ if (sentenceTokens[j].trim().startsWith("[CONJ]")
221
+ || sentenceTokens[j].trim().startsWith("[PSEU]")
222
+ || sentenceTokens[j].trim().startsWith("[POST]")
223
+ || sentenceTokens[j].trim().startsWith("[PREP]")
224
+ || sentenceTokens[j].trim().startsWith("[POSP]")) {
225
+ break;
226
+ }
227
+ if (!tags[j].equalsIgnoreCase("cue")) {
228
+ tags[j] = "scope";
229
+ }
230
+ }
231
+ }
232
+ }
233
+
234
+ sentence = sb.toString().trim();
235
+ pSpace = Pattern.compile(splitPattern);
236
+ sentenceTokens = pSpace.split(sentence);
237
+ StringBuilder sb2 = new StringBuilder();
238
+
239
+ // Check for [POST]
240
+ for (int i = sentenceTokens.length - 1; i > 0; i--) {
241
+ sb2.insert(0, sentenceTokens[i] + " ");
242
+ if (sentenceTokens[i].trim().startsWith("[POST]")) {
243
+ for (int j = i - 1; j > 0; j--) {
244
+ if (sentenceTokens[j].trim().startsWith("[CONJ]")
245
+ || sentenceTokens[j].trim().startsWith("[PSEU]")
246
+ || sentenceTokens[j].trim().startsWith("[PREN]")
247
+ || sentenceTokens[j].trim().startsWith("[PREP]")
248
+ || sentenceTokens[j].trim().startsWith("[POSP]")) {
249
+ break;
250
+ }
251
+ if (!tags[j].equalsIgnoreCase("cue")) {
252
+ tags[j] = "scope";
253
+ }
254
+ }
255
+ }
256
+ }
257
+ }
258
+
259
+ public String negCheck(String sentenceString, String phraseString, ArrayList ruleStrings,
260
+ boolean negatePossible) throws Exception {
261
+
262
+ Sorter s = new Sorter();
263
+ String sToReturn = "";
264
+ String sScope = "";
265
+ String sentencePortion = "";
266
+
267
+ String filler = "_";
268
+ boolean negPoss = negatePossible;
269
+ boolean negationScope = true;
270
+
271
+ // Sort the rules by length in descending order.
272
+ // Rules need to be sorted so the longest rule is always tried to match
273
+ // first.
274
+ // Some of the rules overlap so without sorting first shorter rules (some of them POSSIBLE or PSEUDO)
275
+ // would match before longer legitimate negation rules.
276
+ //
277
+
278
+ // There is efficiency issue here. It is better if rules are sorted by the
279
+ // calling program once and used without sorting in GennegEx.
280
+ List<String> sortedRules = s.sortRules(ruleStrings);
281
+
282
+ // Process the sentence and tag each matched negation
283
+ // rule with correct negation rule tag.
284
+ //
285
+ // At the same time check for the phrase that we want to decide
286
+ // the negation status for and
287
+ // tag the phrase with [PHRASE] ... [PHRASE]
288
+ // In both the negation rules and in the phrase replace white space
289
+ // with "filler" string. (This could cause problems if the sentences
290
+ // we study has "filler" on their own.)
291
+
292
+ // Sentence needs one character in the beginning and end to match.
293
+ // We remove the extra characters after processing.
294
+ String sentence = "." + sentenceString + ".";
295
+
296
+ // Tag the phrases we want to detect for negation.
297
+ // Should happen before rule detection.
298
+ String phrase = phraseString;
299
+ Pattern pph = Pattern.compile(phrase.trim(), Pattern.CASE_INSENSITIVE);
300
+ Matcher mph = pph.matcher(sentence);
301
+
302
+ while (mph.find() == true) {
303
+ sentence = mph.replaceAll(" [PHRASE]" + mph.group().trim().replaceAll(" ", filler) + "[PHRASE]");
304
+ }
305
+
306
+ Iterator iRule = sortedRules.iterator();
307
+ while (iRule.hasNext()) {
308
+ String rule = (String) iRule.next();
309
+ Pattern p = Pattern.compile("[\\t]+"); // Working.
310
+ String[] ruleTokens = p.split(rule.trim());
311
+ // Add the regular expression characters to tokens and asemble the rule again.
312
+ String[] ruleMembers = ruleTokens[0].trim().split(" ");
313
+ String rule2 = "";
314
+ for (int i = 0; i <= ruleMembers.length - 1; i++) {
315
+ if (!ruleMembers[i].equals("")) {
316
+ if (ruleMembers.length == 1) {
317
+ rule2 = ruleMembers[i];
318
+ } else {
319
+ rule2 = rule2 + ruleMembers[i].trim() + "\\s+";
320
+ }
321
+ }
322
+ }
323
+ // Remove the last s+
324
+ if (rule2.endsWith("\\s+")) {
325
+ rule2 = rule2.substring(0, rule2.lastIndexOf("\\s+"));
326
+ }
327
+
328
+ rule2 = "(?m)(?i)[[\\p{Punct}&&[^\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]";
329
+
330
+ Pattern p2 = Pattern.compile(rule2.trim());
331
+ Matcher m = p2.matcher(sentence);
332
+
333
+ while (m.find() == true) {
334
+ sentence = m.replaceAll(" " + ruleTokens[1].trim() + m.group().trim().replaceAll(" ", filler) + ruleTokens[1].trim() + " ");
335
+ }
336
+ }
337
+
338
+
339
+ // Exchange the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED]
340
+ // based of PREN, POST rules and if flag is set to true
341
+ // then based on PREP and POSP, as well.
342
+
343
+ // Because PRENEGATION [PREN} is checked first it takes precedent over
344
+ // POSTNEGATION [POST].
345
+ // Similarly POSTNEGATION [POST] takes precedent over POSSIBLE PRENEGATION [PREP]
346
+ // and [PREP] takes precedent over POSSIBLE POSTNEGATION [POSP].
347
+
348
+ Pattern pSpace = Pattern.compile("[\\s+]");
349
+ String[] sentenceTokens = pSpace.split(sentence);
350
+ StringBuilder sb = new StringBuilder();
351
+
352
+
353
+ // Check for [PREN]
354
+ for (int i = 0; i < sentenceTokens.length; i++) {
355
+ sb.append(" " + sentenceTokens[i].trim());
356
+ if (sentenceTokens[i].trim().startsWith("[PREN]")) {
357
+
358
+ for (int j = i + 1; j < sentenceTokens.length; j++) {
359
+ if (sentenceTokens[j].trim().startsWith("[CONJ]")
360
+ || sentenceTokens[j].trim().startsWith("[PSEU]")
361
+ || sentenceTokens[j].trim().startsWith("[POST]")
362
+ || sentenceTokens[j].trim().startsWith("[PREP]")
363
+ || sentenceTokens[j].trim().startsWith("[POSP]")) {
364
+ break;
365
+ }
366
+
367
+ if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
368
+ sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[NEGATED]");
369
+ }
370
+ }
371
+ }
372
+ }
373
+
374
+ sentence = sb.toString();
375
+ pSpace = Pattern.compile("[\\s+]");
376
+ sentenceTokens = pSpace.split(sentence);
377
+ StringBuilder sb2 = new StringBuilder();
378
+
379
+ // Check for [POST]
380
+ for (int i = sentenceTokens.length - 1; i > 0; i--) {
381
+ sb2.insert(0, sentenceTokens[i] + " ");
382
+ if (sentenceTokens[i].trim().startsWith("[POST]")) {
383
+ for (int j = i - 1; j > 0; j--) {
384
+ if (sentenceTokens[j].trim().startsWith("[CONJ]")
385
+ || sentenceTokens[j].trim().startsWith("[PSEU]")
386
+ || sentenceTokens[j].trim().startsWith("[PREN]")
387
+ || sentenceTokens[j].trim().startsWith("[PREP]")
388
+ || sentenceTokens[j].trim().startsWith("[POSP]")) {
389
+ break;
390
+ }
391
+
392
+ if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
393
+ sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[NEGATED]");
394
+ }
395
+ }
396
+ }
397
+ }
398
+
399
+ sentence = sb2.toString();
400
+
401
+ // If POSSIBLE negation is detected as negation.
402
+ // negatePossible being set to "true" then check for [PREP] and [POSP].
403
+ if (negPoss == true) {
404
+ pSpace = Pattern.compile("[\\s+]");
405
+ sentenceTokens = pSpace.split(sentence);
406
+
407
+ StringBuilder sb3 = new StringBuilder();
408
+
409
+ // Check for [PREP]
410
+ for (int i = 0; i < sentenceTokens.length; i++) {
411
+ sb3.append(" " + sentenceTokens[i].trim());
412
+ if (sentenceTokens[i].trim().startsWith("[PREP]")) {
413
+
414
+ for (int j = i + 1; j < sentenceTokens.length; j++) {
415
+ if (sentenceTokens[j].trim().startsWith("[CONJ]")
416
+ || sentenceTokens[j].trim().startsWith("[PSEU]")
417
+ || sentenceTokens[j].trim().startsWith("[POST]")
418
+ || sentenceTokens[j].trim().startsWith("[PREN]")
419
+ || sentenceTokens[j].trim().startsWith("[POSP]")) {
420
+ break;
421
+ }
422
+
423
+ if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
424
+ sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[POSSIBLE]");
425
+ }
426
+ }
427
+ }
428
+ }
429
+
430
+ sentence = sb3.toString();
431
+ pSpace = Pattern.compile("[\\s+]");
432
+ sentenceTokens = pSpace.split(sentence);
433
+ StringBuilder sb4 = new StringBuilder();
434
+
435
+ // Check for [POSP]
436
+ for (int i = sentenceTokens.length - 1; i > 0; i--) {
437
+ sb4.insert(0, sentenceTokens[i] + " ");
438
+ if (sentenceTokens[i].trim().startsWith("[POSP]")) {
439
+ for (int j = i - 1; j > 0; j--) {
440
+ if (sentenceTokens[j].trim().startsWith("[CONJ]")
441
+ || sentenceTokens[j].trim().startsWith("[PSEU]")
442
+ || sentenceTokens[j].trim().startsWith("[PREN]")
443
+ || sentenceTokens[j].trim().startsWith("[PREP]")
444
+ || sentenceTokens[j].trim().startsWith("[POST]")) {
445
+ break;
446
+ }
447
+
448
+ if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
449
+ sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[POSSIBLE]");
450
+ }
451
+ }
452
+ }
453
+ }
454
+
455
+ sentence = sb4.toString();
456
+ }
457
+
458
+ // Remove the filler character we used.
459
+ sentence = sentence.replaceAll(filler, " ");
460
+
461
+ // Remove the extra periods at the beginning
462
+ // and end of the sentence.
463
+ sentence = sentence.substring(0, sentence.trim().lastIndexOf('.'));
464
+ sentence = sentence.replaceFirst(".", "");
465
+
466
+ // Get the scope of the negation for PREN and PREP
467
+ if (sentence.contains("[PREN]") || sentence.contains("[PREP]")) {
468
+ int startOffset = sentence.indexOf("[PREN]");
469
+ if (startOffset == -1) {
470
+ startOffset = sentence.indexOf("[PREP]");
471
+ }
472
+
473
+ int endOffset = sentence.indexOf("[CONJ]");
474
+ if (endOffset == -1) {
475
+ endOffset = sentence.indexOf("[PSEU]");
476
+ }
477
+ if (endOffset == -1) {
478
+ endOffset = sentence.indexOf("[POST]");
479
+ }
480
+ if (endOffset == -1) {
481
+ endOffset = sentence.indexOf("[POSP]");
482
+ }
483
+ if (endOffset == -1 || endOffset < startOffset) {
484
+ endOffset = sentence.length() - 1;
485
+ }
486
+ sScope = sentence.substring(startOffset, endOffset + 1);
487
+ }
488
+
489
+ // Get the scope of the negation for POST and POSP
490
+ if (sentence.contains("[POST]") || sentence.contains("[POSP]")) {
491
+ int endOffset = sentence.lastIndexOf("[POST]");
492
+ if (endOffset == -1) {
493
+ endOffset = sentence.lastIndexOf("[POSP]");
494
+ }
495
+
496
+ int startOffset = sentence.lastIndexOf("[CONJ]");
497
+ if (startOffset == -1) {
498
+ startOffset = sentence.lastIndexOf("[PSEU]");
499
+ }
500
+ if (startOffset == -1) {
501
+ startOffset = sentence.lastIndexOf("[PREN]");
502
+ }
503
+ if (startOffset == -1) {
504
+ startOffset = sentence.lastIndexOf("[PREP]");
505
+ }
506
+ if (startOffset == -1) {
507
+ startOffset = 0;
508
+ }
509
+ sScope = sentence.substring(startOffset, endOffset);
510
+ }
511
+
512
+ // Classify to: negated/possible/affirmed
513
+ if (sentence.contains("[NEGATED]")) {
514
+ sentence = sentence + "\t" + "negated" + "\t" + sScope;
515
+ } else if (sentence.contains("[POSSIBLE]")) {
516
+ sentence = sentence + "\t" + "possible" + "\t" + sScope;
517
+ } else {
518
+ sentence = sentence + "\t" + "affirmed" + "\t" + sScope;
519
+ }
520
+ previousRunScope = sScope;
521
+
522
+ sToReturn = sentence;
523
+
524
+ return sToReturn;
525
+ }
526
+
527
+ public String getPreviousRunScope() {
528
+ return previousRunScope;
529
+ }
530
+ }