abstractor 1.0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (346) hide show
  1. checksums.yaml +15 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +14 -0
  4. data/Rakefile +33 -0
  5. data/app/assets/images/abstractor/add.png +0 -0
  6. data/app/assets/images/abstractor/ajax-loader.gif +0 -0
  7. data/app/assets/images/abstractor/bar.gif +0 -0
  8. data/app/assets/images/abstractor/bti_logo.jpg +0 -0
  9. data/app/assets/images/abstractor/bullet_arrow_down.png +0 -0
  10. data/app/assets/images/abstractor/bullet_arrow_up.png +0 -0
  11. data/app/assets/images/abstractor/cog.png +0 -0
  12. data/app/assets/images/abstractor/delete.png +0 -0
  13. data/app/assets/images/abstractor/edit.png +0 -0
  14. data/app/assets/images/abstractor/excel.png +0 -0
  15. data/app/assets/images/abstractor/favicon.ico +0 -0
  16. data/app/assets/images/abstractor/greencheck.gif +0 -0
  17. data/app/assets/images/abstractor/loading.gif +0 -0
  18. data/app/assets/images/abstractor/nu_logo.jpg +0 -0
  19. data/app/assets/images/abstractor/nubic_logo.png +0 -0
  20. data/app/assets/images/abstractor/page.png +0 -0
  21. data/app/assets/images/abstractor/rails.png +0 -0
  22. data/app/assets/images/abstractor/robert_h_lurie.jpg +0 -0
  23. data/app/assets/images/abstractor/show.png +0 -0
  24. data/app/assets/images/abstractor/switch_minus.gif +0 -0
  25. data/app/assets/images/abstractor/switch_plus.gif +0 -0
  26. data/app/assets/javascripts/abstractor/abstractor.js +89 -0
  27. data/app/assets/javascripts/abstractor/application.js +21 -0
  28. data/app/assets/javascripts/abstractor/combobox.js +301 -0
  29. data/app/assets/javascripts/abstractor/jquery/jquery.form.js +1074 -0
  30. data/app/assets/javascripts/abstractor/nested_attributes.js +69 -0
  31. data/app/assets/stylesheets/abstractor/abstractor_abstractions.css +312 -0
  32. data/app/assets/stylesheets/abstractor/application.css +21 -0
  33. data/app/assets/stylesheets/abstractor/jquery.ui.override.css.scss +73 -0
  34. data/app/controllers/abstractor/abstractor_abstraction_groups_controller.rb +5 -0
  35. data/app/controllers/abstractor/abstractor_abstractions_controller.rb +5 -0
  36. data/app/controllers/abstractor/abstractor_suggestions_controller.rb +5 -0
  37. data/app/controllers/abstractor/application_controller.rb +2 -0
  38. data/app/helpers/abstractor/application_helper.rb +4 -0
  39. data/app/models/abstractor/abstractor_abstraction.rb +6 -0
  40. data/app/models/abstractor/abstractor_abstraction_group.rb +6 -0
  41. data/app/models/abstractor/abstractor_abstraction_group_member.rb +6 -0
  42. data/app/models/abstractor/abstractor_abstraction_schema.rb +6 -0
  43. data/app/models/abstractor/abstractor_abstraction_schema_object_value.rb +6 -0
  44. data/app/models/abstractor/abstractor_abstraction_schema_predicate_variant.rb +6 -0
  45. data/app/models/abstractor/abstractor_abstraction_schema_relation.rb +6 -0
  46. data/app/models/abstractor/abstractor_abstraction_source.rb +6 -0
  47. data/app/models/abstractor/abstractor_object_type.rb +6 -0
  48. data/app/models/abstractor/abstractor_object_value.rb +6 -0
  49. data/app/models/abstractor/abstractor_object_value_variant.rb +6 -0
  50. data/app/models/abstractor/abstractor_relation_type.rb +6 -0
  51. data/app/models/abstractor/abstractor_rule_type.rb +6 -0
  52. data/app/models/abstractor/abstractor_subject.rb +6 -0
  53. data/app/models/abstractor/abstractor_subject_group.rb +6 -0
  54. data/app/models/abstractor/abstractor_subject_group_member.rb +6 -0
  55. data/app/models/abstractor/abstractor_subject_relation.rb +6 -0
  56. data/app/models/abstractor/abstractor_suggestion.rb +6 -0
  57. data/app/models/abstractor/abstractor_suggestion_object_value.rb +6 -0
  58. data/app/models/abstractor/abstractor_suggestion_source.rb +6 -0
  59. data/app/models/abstractor/abstractor_suggestion_status.rb +6 -0
  60. data/app/views/abstractor/abstractor_abstraction_groups/_form.html.haml +10 -0
  61. data/app/views/abstractor/abstractor_abstraction_groups/edit.html.haml +1 -0
  62. data/app/views/abstractor/abstractor_abstractions/_fields.html.haml +63 -0
  63. data/app/views/abstractor/abstractor_abstractions/_list.html.haml +45 -0
  64. data/app/views/abstractor/abstractor_abstractions/edit.html.haml +53 -0
  65. data/app/views/abstractor/abstractor_abstractions/show.html.haml +1 -0
  66. data/app/views/abstractor/shared/_error_messages.html.haml +5 -0
  67. data/config/cucumber.yml +8 -0
  68. data/config/routes.rb +7 -0
  69. data/db/migrate/20131227205140_create_abstractor_object_types.rb +10 -0
  70. data/db/migrate/20131227205219_create_abstractor_object_values.rb +10 -0
  71. data/db/migrate/20131227205256_create_abstractor_object_value_variants.rb +11 -0
  72. data/db/migrate/20131227205320_create_abstractor_relation_types.rb +10 -0
  73. data/db/migrate/20131227205354_create_abstractor_rule_types.rb +11 -0
  74. data/db/migrate/20131227205432_create_abstractor_abstraction_schemas.rb +13 -0
  75. data/db/migrate/20131227205456_create_abstractor_abstraction_schema_object_values.rb +10 -0
  76. data/db/migrate/20131227205529_create_abstractor_abstraction_schema_predicate_variants.rb +11 -0
  77. data/db/migrate/20131227205610_create_abstractor_abstraction_schema_relations.rb +11 -0
  78. data/db/migrate/20131227205652_create_abstractor_subjects.rb +12 -0
  79. data/db/migrate/20131227205732_create_abstractor_subject_relations.rb +11 -0
  80. data/db/migrate/20131227205831_create_abstractor_abstraction_sources.rb +11 -0
  81. data/db/migrate/20131227210211_create_abstractor_subject_groups.rb +10 -0
  82. data/db/migrate/20131227210244_create_abstractor_subject_group_members.rb +11 -0
  83. data/db/migrate/20131227210350_create_abstractor_abstraction_groups.rb +12 -0
  84. data/db/migrate/20131227210353_create_abstractor_abstraction_group_members.rb +11 -0
  85. data/db/migrate/20131227211050_create_abstractor_suggestion_statuses.rb +10 -0
  86. data/db/migrate/20131227211303_create_abstractor_abstractions.rb +15 -0
  87. data/db/migrate/20131227213427_create_abstractor_suggestions.rb +14 -0
  88. data/db/migrate/20131228041944_create_abstractor_suggestion_object_values.rb +11 -0
  89. data/db/migrate/20131228041945_create_abstractor_suggestion_sources.rb +16 -0
  90. data/db/seeds.rb +0 -0
  91. data/lib/abstractor.rb +8 -0
  92. data/lib/abstractor/abstractable.rb +190 -0
  93. data/lib/abstractor/core_ext/string.rb +99 -0
  94. data/lib/abstractor/engine.rb +14 -0
  95. data/lib/abstractor/methods/controllers/abstractor_abstraction_groups_controller.rb +37 -0
  96. data/lib/abstractor/methods/controllers/abstractor_abstractions_controller.rb +42 -0
  97. data/lib/abstractor/methods/controllers/abstractor_suggestions_controller.rb +28 -0
  98. data/lib/abstractor/methods/models/abstractor_abstraction.rb +65 -0
  99. data/lib/abstractor/methods/models/abstractor_abstraction_group.rb +37 -0
  100. data/lib/abstractor/methods/models/abstractor_abstraction_group_member.rb +17 -0
  101. data/lib/abstractor/methods/models/abstractor_abstraction_schema.rb +27 -0
  102. data/lib/abstractor/methods/models/abstractor_abstraction_schema_object_value.rb +17 -0
  103. data/lib/abstractor/methods/models/abstractor_abstraction_schema_predicate_variant.rb +16 -0
  104. data/lib/abstractor/methods/models/abstractor_abstraction_schema_relation.rb +18 -0
  105. data/lib/abstractor/methods/models/abstractor_abstraction_source.rb +29 -0
  106. data/lib/abstractor/methods/models/abstractor_object_type.rb +16 -0
  107. data/lib/abstractor/methods/models/abstractor_object_value.rb +24 -0
  108. data/lib/abstractor/methods/models/abstractor_object_value_variant.rb +16 -0
  109. data/lib/abstractor/methods/models/abstractor_relation_type.rb +16 -0
  110. data/lib/abstractor/methods/models/abstractor_rule_type.rb +17 -0
  111. data/lib/abstractor/methods/models/abstractor_subject.rb +258 -0
  112. data/lib/abstractor/methods/models/abstractor_subject_group.rb +19 -0
  113. data/lib/abstractor/methods/models/abstractor_subject_group_member.rb +17 -0
  114. data/lib/abstractor/methods/models/abstractor_subject_relation.rb +18 -0
  115. data/lib/abstractor/methods/models/abstractor_suggestion.rb +88 -0
  116. data/lib/abstractor/methods/models/abstractor_suggestion_object_value.rb +17 -0
  117. data/lib/abstractor/methods/models/abstractor_suggestion_source.rb +17 -0
  118. data/lib/abstractor/methods/models/abstractor_suggestion_status.rb +29 -0
  119. data/lib/abstractor/methods/models/soft_delete.rb +35 -0
  120. data/lib/abstractor/negation_detection.rb +43 -0
  121. data/lib/abstractor/parser.rb +76 -0
  122. data/lib/abstractor/setup.rb +24 -0
  123. data/lib/abstractor/user_interface.rb +40 -0
  124. data/lib/abstractor/utility.rb +8 -0
  125. data/lib/abstractor/version.rb +3 -0
  126. data/lib/generators/abstractor/install/install_generator.rb +118 -0
  127. data/lib/generators/abstractor/install/templates/controllers/abstractor_abstraction_groups_controller.rb +16 -0
  128. data/lib/generators/abstractor/install/templates/controllers/abstractor_abstractions_controller.rb +24 -0
  129. data/lib/generators/abstractor/install/templates/controllers/abstractor_suggestions_controller.rb +12 -0
  130. data/lib/generators/abstractor/install/templates/dictionaries.yml +235 -0
  131. data/lib/generators/abstractor/install/templates/helpers/abstractions_helper.rb +9 -0
  132. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction.rb +9 -0
  133. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group.rb +9 -0
  134. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group_member.rb +9 -0
  135. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema.rb +9 -0
  136. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_object_value.rb +9 -0
  137. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_predicate_variant.rb +9 -0
  138. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_relation.rb +9 -0
  139. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_source.rb +9 -0
  140. data/lib/generators/abstractor/install/templates/models/abstractor_object_type.rb +9 -0
  141. data/lib/generators/abstractor/install/templates/models/abstractor_object_value.rb +9 -0
  142. data/lib/generators/abstractor/install/templates/models/abstractor_object_value_variant.rb +9 -0
  143. data/lib/generators/abstractor/install/templates/models/abstractor_relation_type.rb +9 -0
  144. data/lib/generators/abstractor/install/templates/models/abstractor_rule_type.rb +9 -0
  145. data/lib/generators/abstractor/install/templates/models/abstractor_subject.rb +9 -0
  146. data/lib/generators/abstractor/install/templates/models/abstractor_subject_group.rb +9 -0
  147. data/lib/generators/abstractor/install/templates/models/abstractor_subject_group_member.rb +9 -0
  148. data/lib/generators/abstractor/install/templates/models/abstractor_subject_relation.rb +9 -0
  149. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion.rb +9 -0
  150. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_object_value.rb +9 -0
  151. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_source.rb +9 -0
  152. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_status.rb +9 -0
  153. data/lib/lingscope/build.xml +74 -0
  154. data/lib/lingscope/build/built-jar.properties +4 -0
  155. data/lib/lingscope/build/classes/lingscope/algorithms/AbnerTokenizer.class +0 -0
  156. data/lib/lingscope/build/classes/lingscope/algorithms/AnnotationComparer.class +0 -0
  157. data/lib/lingscope/build/classes/lingscope/algorithms/Annotator.class +0 -0
  158. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineAnnotator.class +0 -0
  159. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineCueAnnotator.class +0 -0
  160. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineScopeAnnotator.class +0 -0
  161. data/lib/lingscope/build/classes/lingscope/algorithms/CrfAnnotator.class +0 -0
  162. data/lib/lingscope/build/classes/lingscope/algorithms/NegexAnnotator.class +0 -0
  163. data/lib/lingscope/build/classes/lingscope/algorithms/NegexCueAnnotator.class +0 -0
  164. data/lib/lingscope/build/classes/lingscope/algorithms/NegexScopeAnnotator.class +0 -0
  165. data/lib/lingscope/build/classes/lingscope/algorithms/PosTagger.class +0 -0
  166. data/lib/lingscope/build/classes/lingscope/algorithms/negex/GenNegEx.class +0 -0
  167. data/lib/lingscope/build/classes/lingscope/algorithms/negex/Sorter.class +0 -0
  168. data/lib/lingscope/build/classes/lingscope/drivers/AnnotatedFilesMerger.class +0 -0
  169. data/lib/lingscope/build/classes/lingscope/drivers/AnnotationComparerDriver.class +0 -0
  170. data/lib/lingscope/build/classes/lingscope/drivers/BaselineDriver.class +0 -0
  171. data/lib/lingscope/build/classes/lingscope/drivers/CrfDriver.class +0 -0
  172. data/lib/lingscope/build/classes/lingscope/drivers/CueAndPosFilesMerger.class +0 -0
  173. data/lib/lingscope/build/classes/lingscope/drivers/ModelTrainer.class +0 -0
  174. data/lib/lingscope/build/classes/lingscope/drivers/NegexDriver.class +0 -0
  175. data/lib/lingscope/build/classes/lingscope/drivers/PosTaggerDriver.class +0 -0
  176. data/lib/lingscope/build/classes/lingscope/drivers/SentencePosTagger.class +0 -0
  177. data/lib/lingscope/build/classes/lingscope/drivers/SentenceTagger.class +0 -0
  178. data/lib/lingscope/build/classes/lingscope/io/AnnotatedSentencesIO.class +0 -0
  179. data/lib/lingscope/build/classes/lingscope/structures/AnnotatedSentence.class +0 -0
  180. data/lib/lingscope/dist/README.TXT +32 -0
  181. data/lib/lingscope/dist/javadoc/allclasses-frame.html +80 -0
  182. data/lib/lingscope/dist/javadoc/allclasses-noframe.html +80 -0
  183. data/lib/lingscope/dist/javadoc/constant-values.html +199 -0
  184. data/lib/lingscope/dist/javadoc/deprecated-list.html +147 -0
  185. data/lib/lingscope/dist/javadoc/help-doc.html +224 -0
  186. data/lib/lingscope/dist/javadoc/index-files/index-1.html +188 -0
  187. data/lib/lingscope/dist/javadoc/index-files/index-10.html +149 -0
  188. data/lib/lingscope/dist/javadoc/index-files/index-11.html +158 -0
  189. data/lib/lingscope/dist/javadoc/index-files/index-12.html +157 -0
  190. data/lib/lingscope/dist/javadoc/index-files/index-13.html +177 -0
  191. data/lib/lingscope/dist/javadoc/index-files/index-14.html +155 -0
  192. data/lib/lingscope/dist/javadoc/index-files/index-15.html +152 -0
  193. data/lib/lingscope/dist/javadoc/index-files/index-16.html +146 -0
  194. data/lib/lingscope/dist/javadoc/index-files/index-2.html +158 -0
  195. data/lib/lingscope/dist/javadoc/index-files/index-3.html +165 -0
  196. data/lib/lingscope/dist/javadoc/index-files/index-4.html +146 -0
  197. data/lib/lingscope/dist/javadoc/index-files/index-5.html +219 -0
  198. data/lib/lingscope/dist/javadoc/index-files/index-6.html +149 -0
  199. data/lib/lingscope/dist/javadoc/index-files/index-7.html +155 -0
  200. data/lib/lingscope/dist/javadoc/index-files/index-8.html +185 -0
  201. data/lib/lingscope/dist/javadoc/index-files/index-9.html +164 -0
  202. data/lib/lingscope/dist/javadoc/index.html +74 -0
  203. data/lib/lingscope/dist/javadoc/lingscope/algorithms/AbnerTokenizer.html +280 -0
  204. data/lib/lingscope/dist/javadoc/lingscope/algorithms/AnnotationComparer.html +526 -0
  205. data/lib/lingscope/dist/javadoc/lingscope/algorithms/Annotator.html +401 -0
  206. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineAnnotator.html +375 -0
  207. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineCueAnnotator.html +309 -0
  208. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineScopeAnnotator.html +340 -0
  209. data/lib/lingscope/dist/javadoc/lingscope/algorithms/CrfAnnotator.html +340 -0
  210. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexAnnotator.html +364 -0
  211. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexCueAnnotator.html +309 -0
  212. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexScopeAnnotator.html +309 -0
  213. data/lib/lingscope/dist/javadoc/lingscope/algorithms/PosTagger.html +268 -0
  214. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AbnerTokenizer.html +145 -0
  215. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AnnotationComparer.html +145 -0
  216. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/Annotator.html +299 -0
  217. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineAnnotator.html +189 -0
  218. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineCueAnnotator.html +145 -0
  219. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineScopeAnnotator.html +145 -0
  220. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/CrfAnnotator.html +145 -0
  221. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexAnnotator.html +189 -0
  222. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexCueAnnotator.html +145 -0
  223. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexScopeAnnotator.html +145 -0
  224. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/PosTagger.html +145 -0
  225. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/GenNegEx.html +369 -0
  226. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/Sorter.html +253 -0
  227. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/GenNegEx.html +181 -0
  228. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/Sorter.html +145 -0
  229. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-frame.html +35 -0
  230. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-summary.html +165 -0
  231. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-tree.html +154 -0
  232. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-use.html +174 -0
  233. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-frame.html +53 -0
  234. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-summary.html +198 -0
  235. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-tree.html +160 -0
  236. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-use.html +202 -0
  237. data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotatedFilesMerger.html +284 -0
  238. data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotationComparerDriver.html +258 -0
  239. data/lib/lingscope/dist/javadoc/lingscope/drivers/BaselineDriver.html +260 -0
  240. data/lib/lingscope/dist/javadoc/lingscope/drivers/CrfDriver.html +262 -0
  241. data/lib/lingscope/dist/javadoc/lingscope/drivers/CueAndPosFilesMerger.html +310 -0
  242. data/lib/lingscope/dist/javadoc/lingscope/drivers/ModelTrainer.html +281 -0
  243. data/lib/lingscope/dist/javadoc/lingscope/drivers/NegexDriver.html +262 -0
  244. data/lib/lingscope/dist/javadoc/lingscope/drivers/PosTaggerDriver.html +289 -0
  245. data/lib/lingscope/dist/javadoc/lingscope/drivers/SentencePosTagger.html +313 -0
  246. data/lib/lingscope/dist/javadoc/lingscope/drivers/SentenceTagger.html +512 -0
  247. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotatedFilesMerger.html +145 -0
  248. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotationComparerDriver.html +145 -0
  249. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/BaselineDriver.html +145 -0
  250. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CrfDriver.html +145 -0
  251. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CueAndPosFilesMerger.html +145 -0
  252. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/ModelTrainer.html +145 -0
  253. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/NegexDriver.html +145 -0
  254. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/PosTaggerDriver.html +145 -0
  255. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentencePosTagger.html +145 -0
  256. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentenceTagger.html +145 -0
  257. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-frame.html +51 -0
  258. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-summary.html +195 -0
  259. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-tree.html +154 -0
  260. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-use.html +145 -0
  261. data/lib/lingscope/dist/javadoc/lingscope/io/AnnotatedSentencesIO.html +285 -0
  262. data/lib/lingscope/dist/javadoc/lingscope/io/class-use/AnnotatedSentencesIO.html +145 -0
  263. data/lib/lingscope/dist/javadoc/lingscope/io/package-frame.html +33 -0
  264. data/lib/lingscope/dist/javadoc/lingscope/io/package-summary.html +158 -0
  265. data/lib/lingscope/dist/javadoc/lingscope/io/package-tree.html +154 -0
  266. data/lib/lingscope/dist/javadoc/lingscope/io/package-use.html +145 -0
  267. data/lib/lingscope/dist/javadoc/lingscope/structures/AnnotatedSentence.html +425 -0
  268. data/lib/lingscope/dist/javadoc/lingscope/structures/class-use/AnnotatedSentence.html +423 -0
  269. data/lib/lingscope/dist/javadoc/lingscope/structures/package-frame.html +33 -0
  270. data/lib/lingscope/dist/javadoc/lingscope/structures/package-summary.html +158 -0
  271. data/lib/lingscope/dist/javadoc/lingscope/structures/package-tree.html +154 -0
  272. data/lib/lingscope/dist/javadoc/lingscope/structures/package-use.html +209 -0
  273. data/lib/lingscope/dist/javadoc/overview-frame.html +51 -0
  274. data/lib/lingscope/dist/javadoc/overview-summary.html +168 -0
  275. data/lib/lingscope/dist/javadoc/overview-tree.html +159 -0
  276. data/lib/lingscope/dist/javadoc/package-list +5 -0
  277. data/lib/lingscope/dist/javadoc/resources/inherit.gif +0 -0
  278. data/lib/lingscope/dist/javadoc/stylesheet.css +29 -0
  279. data/lib/lingscope/dist/lib/abner.jar +0 -0
  280. data/lib/lingscope/dist/lib/commons-lang-2.4.jar +0 -0
  281. data/lib/lingscope/dist/lib/java_utils.jar +0 -0
  282. data/lib/lingscope/dist/lib/sptoolkit.jar +0 -0
  283. data/lib/lingscope/dist/lib/stanford-postagger.jar +0 -0
  284. data/lib/lingscope/dist/lingscope.jar +0 -0
  285. data/lib/lingscope/lingscope.zip +0 -0
  286. data/lib/lingscope/manifest.mf +3 -0
  287. data/lib/lingscope/nbproject/build-impl.xml +1338 -0
  288. data/lib/lingscope/nbproject/genfiles.properties +8 -0
  289. data/lib/lingscope/nbproject/private/config.properties +0 -0
  290. data/lib/lingscope/nbproject/private/private.properties +10 -0
  291. data/lib/lingscope/nbproject/private/private.xml +5 -0
  292. data/lib/lingscope/nbproject/project.properties +85 -0
  293. data/lib/lingscope/nbproject/project.xml +15 -0
  294. data/lib/lingscope/negation_models.zip +0 -0
  295. data/lib/lingscope/negation_models/baseline_cue_all_both.model +49 -0
  296. data/lib/lingscope/negation_models/baseline_cue_all_clinical.model +19 -0
  297. data/lib/lingscope/negation_models/crf_cue_all_both.model +0 -0
  298. data/lib/lingscope/negation_models/crf_cue_all_clinical.model +0 -0
  299. data/lib/lingscope/negation_models/crf_scope_cue_crf_all_both.model +0 -0
  300. data/lib/lingscope/negation_models/crf_scope_cue_crf_all_clinical.model +0 -0
  301. data/lib/lingscope/negation_models/crf_scope_cue_regex_all_both.model +0 -0
  302. data/lib/lingscope/negation_models/crf_scope_cue_regex_all_clinical.model +0 -0
  303. data/lib/lingscope/negation_models/crf_scope_words_all_both.model +0 -0
  304. data/lib/lingscope/negation_models/crf_scope_words_all_clinical.model +0 -0
  305. data/lib/lingscope/negation_models/crf_scope_words_crf_all_both.model +0 -0
  306. data/lib/lingscope/negation_models/crf_scope_words_crf_all_clinical.model +0 -0
  307. data/lib/lingscope/negation_models/crf_scope_words_regex_all_both.model +0 -0
  308. data/lib/lingscope/negation_models/crf_scope_words_regex_all_clinical.model +0 -0
  309. data/lib/lingscope/src/lingscope/algorithms/AbnerTokenizer.java +58 -0
  310. data/lib/lingscope/src/lingscope/algorithms/AnnotationComparer.java +207 -0
  311. data/lib/lingscope/src/lingscope/algorithms/Annotator.java +51 -0
  312. data/lib/lingscope/src/lingscope/algorithms/BaselineAnnotator.java +80 -0
  313. data/lib/lingscope/src/lingscope/algorithms/BaselineCueAnnotator.java +84 -0
  314. data/lib/lingscope/src/lingscope/algorithms/BaselineScopeAnnotator.java +101 -0
  315. data/lib/lingscope/src/lingscope/algorithms/CrfAnnotator.java +45 -0
  316. data/lib/lingscope/src/lingscope/algorithms/NegexAnnotator.java +52 -0
  317. data/lib/lingscope/src/lingscope/algorithms/NegexCueAnnotator.java +26 -0
  318. data/lib/lingscope/src/lingscope/algorithms/NegexScopeAnnotator.java +26 -0
  319. data/lib/lingscope/src/lingscope/algorithms/PosTagger.java +54 -0
  320. data/lib/lingscope/src/lingscope/algorithms/negex/GenNegEx.java +530 -0
  321. data/lib/lingscope/src/lingscope/algorithms/negex/Sorter.java +36 -0
  322. data/lib/lingscope/src/lingscope/drivers/AnnotatedFilesMerger.java +61 -0
  323. data/lib/lingscope/src/lingscope/drivers/AnnotationComparerDriver.java +22 -0
  324. data/lib/lingscope/src/lingscope/drivers/BaselineDriver.java +45 -0
  325. data/lib/lingscope/src/lingscope/drivers/CrfDriver.java +31 -0
  326. data/lib/lingscope/src/lingscope/drivers/CueAndPosFilesMerger.java +86 -0
  327. data/lib/lingscope/src/lingscope/drivers/ModelTrainer.java +39 -0
  328. data/lib/lingscope/src/lingscope/drivers/NegexDriver.java +32 -0
  329. data/lib/lingscope/src/lingscope/drivers/PosTaggerDriver.java +62 -0
  330. data/lib/lingscope/src/lingscope/drivers/SentencePosTagger.java +89 -0
  331. data/lib/lingscope/src/lingscope/drivers/SentenceTagger.java +158 -0
  332. data/lib/lingscope/src/lingscope/io/AnnotatedSentencesIO.java +53 -0
  333. data/lib/lingscope/src/lingscope/structures/AnnotatedSentence.java +105 -0
  334. data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.csv +1 -0
  335. data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.txt +2273 -0
  336. data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.csv +1 -0
  337. data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.txt +1342 -0
  338. data/lib/setup/data/ICD-O-3_CSV-metadata/icd-o3 readme.txt +21 -0
  339. data/lib/setup/data/custom_site_synonyms.csv +1 -0
  340. data/lib/setup/data/diagnosis_categorizations.csv +1 -0
  341. data/lib/setup/data/high_level_primary_cns_diagnosis_categorizations.yml +597 -0
  342. data/lib/setup/data/icdo3_sites_with_laterality.txt +11 -0
  343. data/lib/setup/data/site_site_categories.txt +28 -0
  344. data/lib/tasks/abstractor_tasks.rake +22 -0
  345. data/lib/tasks/cucumber.rake +65 -0
  346. metadata +754 -0
@@ -0,0 +1,26 @@
1
+ package lingscope.algorithms;
2
+
3
+ import lingscope.structures.AnnotatedSentence;
4
+
5
+ /**
6
+ *
7
+ * @author shashank
8
+ */
9
+ public class NegexScopeAnnotator extends NegexAnnotator {
10
+
11
+ public NegexScopeAnnotator(String beginTag, String interTag, String otherTag) {
12
+ super(beginTag, interTag, otherTag);
13
+ }
14
+
15
+ @Override
16
+ public AnnotatedSentence annotateSentence(String sentence, boolean isTokenized) {
17
+ if (negex == null) {
18
+ throw new RuntimeException("Annotator has not been loaded");
19
+ }
20
+ if (!isTokenized) {
21
+ sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
22
+ }
23
+ String raw = negex.getScope(sentence, rules, beginTag, interTag, otherTag);
24
+ return new AnnotatedSentence(raw);
25
+ }
26
+ }
@@ -0,0 +1,54 @@
1
+ package lingscope.algorithms;
2
+
3
+ import edu.stanford.nlp.tagger.maxent.MaxentTagger;
4
+ import java.util.ArrayList;
5
+ import java.util.List;
6
+ import java.util.logging.Level;
7
+ import java.util.logging.Logger;
8
+
9
+ /**
10
+ * Part of speech tagger
11
+ * @author shashank
12
+ */
13
+ public class PosTagger {
14
+
15
+ private MaxentTagger posTagger;
16
+
17
+ /**
18
+ * Creates an instance of POS tagger by loading the given grammar file
19
+ * @param grammarFile
20
+ */
21
+ public PosTagger(String grammarFile) {
22
+ try {
23
+ posTagger = new MaxentTagger(grammarFile);
24
+ } catch (Exception ex) {
25
+ Logger.getLogger(PosTagger.class.getName()).log(Level.SEVERE, null, ex);
26
+ }
27
+ }
28
+
29
+ /**
30
+ * Takes a sentence as input and returns list of POS tags associated with
31
+ * each word in the sentence
32
+ * @param sentence
33
+ * @param isTokenized
34
+ * @return
35
+ */
36
+ public List<String> replaceWordsWithPos(String sentence, boolean isTokenized) {
37
+ if (!isTokenized) {
38
+ sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
39
+ }
40
+ List<String> ret = new ArrayList<String>();
41
+ String tagged = "";
42
+ try {
43
+ tagged = posTagger.tagString(sentence);
44
+ } catch (Exception ex) {
45
+ Logger.getLogger(PosTagger.class.getName()).log(Level.SEVERE, null, ex);
46
+ }
47
+ for (String wordTag : tagged.split(" +")) {
48
+ String[] tags = wordTag.split("/");
49
+ String tag = tags[tags.length - 1];
50
+ ret.add(tag);
51
+ }
52
+ return ret;
53
+ }
54
+ }
@@ -0,0 +1,530 @@
1
+ package lingscope.algorithms.negex;
2
+
3
+ import java.util.regex.Matcher;
4
+ import java.util.regex.Pattern;
5
+ import java.util.*;
6
+
7
+ /***************************************************************************************
8
+ * Author: Imre Solti
9
+ * Date: 09/15/2008
10
+ * Modified: 04/15/2009
11
+ * Changed to specifications of test kit and discussions with WC and PH.
12
+ * Modified: 04/26/2009
13
+ * Fixed the deletion of last character in scope fo PREN, PREP negation scopes.
14
+ *
15
+ * Wendy Chapman's NegEx algorithm in Java.
16
+ *
17
+ * Sentence boundaries serve as WINDOW for negation (suggested by Wendy Chapman)
18
+ *
19
+ ****************************************************************************************/
20
+
21
+ /*
22
+ Copyright 2008 Imre Solti
23
+
24
+ Licensed under the Apache License, Version 2.0 (the "License");
25
+
26
+ you may not use this file except in compliance with the License. You may obtain a copy of the License at
27
+
28
+ http://www.apache.org/licenses/LICENSE-2.0
29
+
30
+ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
31
+ WITHOUT
32
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and
33
+ limitations under the License.
34
+ */
35
+ public class GenNegEx {
36
+
37
+ private String previousRunScope;
38
+ private String[] sentenceWords;
39
+ private String[] tags;
40
+
41
+ public String getCue(String sentenceString, List<String> ruleStrings, String beginTag, String interTag, String otherTag) {
42
+ tagNegation(sentenceString, ruleStrings);
43
+ StringBuilder ret = new StringBuilder();
44
+ boolean inScope = false;
45
+ for (int i = 0; i < sentenceWords.length; ++i) {
46
+ String tag = tags[i];
47
+ String sentenceWord = sentenceWords[i];
48
+ ret.append(" ").append(sentenceWord).append("|");
49
+ if (tag.equalsIgnoreCase("cue")) {
50
+ if (inScope) {
51
+ ret.append(interTag);
52
+ } else {
53
+ ret.append(beginTag);
54
+ inScope = true;
55
+ }
56
+ } else {
57
+ ret.append(otherTag);
58
+ inScope = false;
59
+ }
60
+ }
61
+ return ret.substring(1);
62
+ }
63
+
64
+ public String getScope(String sentenceString, List<String> ruleStrings, String beginTag, String interTag, String otherTag) {
65
+ tagNegation(sentenceString, ruleStrings);
66
+ StringBuilder ret = new StringBuilder();
67
+ boolean inScope = false;
68
+ Pattern punct = Pattern.compile("\\p{Punct}");
69
+ for (int i = 0; i < sentenceWords.length; ++i) {
70
+ String tag = tags[i];
71
+ String sentenceWord = sentenceWords[i];
72
+ ret.append(" ").append(sentenceWord).append("|");
73
+ Matcher m = punct.matcher(sentenceWord);
74
+ if (m.matches()) {
75
+ if (i + 1 == sentenceWords.length) {
76
+ ret.append(otherTag);
77
+ continue;
78
+ } else if (tags[i + 1].equalsIgnoreCase("other")) {
79
+ ret.append(otherTag);
80
+ continue;
81
+ }
82
+ }
83
+ if (tag.equalsIgnoreCase("cue") || tag.equalsIgnoreCase("scope")) {
84
+ if (inScope) {
85
+ ret.append(interTag);
86
+ } else {
87
+ ret.append(beginTag);
88
+ inScope = true;
89
+ }
90
+ } else {
91
+ ret.append(otherTag);
92
+ inScope = false;
93
+ }
94
+ }
95
+ return ret.substring(1);
96
+ }
97
+
98
+ public void tagNegation(String sentenceString, List<String> ruleStrings) {
99
+ Sorter s = new Sorter();
100
+ sentenceWords = sentenceString.split("\\s+");
101
+ tags = new String[sentenceWords.length];
102
+ for (int i = 0; i < sentenceWords.length; ++i) {
103
+ tags[i] = "other";
104
+ }
105
+ String sToReturn = "";
106
+ String sScope = "";
107
+ List<String> sortedRules = new ArrayList<String>();
108
+
109
+ String filler = "__";
110
+ boolean negPoss = false;
111
+
112
+ // Sort the rules by length in descending order.
113
+ // Rules need to be sorted so the longest rule is always tried to match
114
+ // first.
115
+ // Some of the rules overlap so without sorting first shorter rules (some of them POSSIBLE or PSEUDO)
116
+ // would match before longer legitimate negation rules.
117
+ //
118
+
119
+ // There is efficiency issue here. It is better if rules are sorted by the
120
+ // calling program once and used without sorting in GennegEx.
121
+ sortedRules = s.sortRules(ruleStrings);
122
+
123
+ // Process the sentence and tag each matched negation
124
+ // rule with correct negation rule tag.
125
+ //
126
+ // At the same time check for the phrase that we want to decide
127
+ // the negation status for and
128
+ // tag the phrase with [PHRASE] ... [PHRASE]
129
+ // In both the negation rules and in the phrase replace white space
130
+ // with "filler" string. (This could cause problems if the sentences
131
+ // we study has "filler" on their own.)
132
+
133
+ // Sentence needs one character in the beginning and end to match.
134
+ // We remove the extra characters after processing.
135
+ String sentence = "." + sentenceString + ".";
136
+
137
+ // Tag the phrases we want to detect for negation.
138
+ // Should happen before rule detection.
139
+
140
+ Iterator<String> iRule = sortedRules.iterator();
141
+ while (iRule.hasNext()) {
142
+ String rule = iRule.next();
143
+ Pattern p = Pattern.compile("[\\t]+"); // Working.
144
+ String[] ruleTokens = p.split(rule.trim());
145
+ String[] ruleMembers = ruleTokens[0].trim().split(" ");
146
+
147
+
148
+ // Add the regular expression characters to tokens and asemble the rule again.
149
+ String rule2 = "";
150
+ for (int i = 0; i < ruleMembers.length; i++) {
151
+ if (!ruleMembers[i].equals("")) {
152
+ if (ruleMembers.length == 1) {
153
+ rule2 = ruleMembers[i];
154
+ } else {
155
+ rule2 = rule2 + ruleMembers[i].trim() + "\\s+";
156
+ }
157
+ }
158
+ }
159
+ // Remove the last s+
160
+ if (rule2.endsWith("\\s+")) {
161
+ rule2 = rule2.substring(0, rule2.lastIndexOf("\\s+"));
162
+ }
163
+
164
+ rule2 = "(?m)(?i)[[\\p{Punct}&&[^-_\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]";
165
+ // rule2 = "(?m)(?i)[[\\p{Punct}&&[^\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]";
166
+
167
+ Pattern p2 = Pattern.compile(rule2.trim());
168
+ Matcher m = p2.matcher(sentence);
169
+ Matcher m2 = p2.matcher(sentence);
170
+ if (m2.find()) {
171
+ /**
172
+ * Only PREN and POST rules matter
173
+ * added by Shashank
174
+ */
175
+ if (ruleTokens[1].equalsIgnoreCase("[PREN]") || ruleTokens[1].equalsIgnoreCase("[POST]")) {
176
+ for (int i = 0; i < sentenceWords.length; ++i) {
177
+ boolean perfectMatch = true;
178
+ for (int j = 0; j < ruleMembers.length; ++j) {
179
+ if (!sentenceWords[i + j].equalsIgnoreCase(ruleMembers[j])) {
180
+ perfectMatch = false;
181
+ break;
182
+ }
183
+ }
184
+ if (perfectMatch) {
185
+ for (int j = 0; j < ruleMembers.length; ++j) {
186
+ tags[i + j] = "cue";
187
+ }
188
+ }
189
+ }
190
+ }
191
+ }
192
+
193
+ while (m.find() == true) {
194
+ sentence = m.replaceAll(" " + ruleTokens[1].trim() + m.group().trim().replaceAll(" ", filler) + ruleTokens[1].trim() + " ");
195
+ }
196
+ }
197
+
198
+
199
+ // Exchange the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED]
200
+ // based of PREN, POST rules and if flag is set to true
201
+ // then based on PREP and POSP, as well.
202
+
203
+ // Because PRENEGATION [PREN} is checked first it takes precedent over
204
+ // POSTNEGATION [POST].
205
+ // Similarly POSTNEGATION [POST] takes precedent over POSSIBLE PRENEGATION [PREP]
206
+ // and [PREP] takes precedent over POSSIBLE POSTNEGATION [POSP].
207
+
208
+ String splitPattern = "\\s+|[^\\s]__";
209
+ Pattern pSpace = Pattern.compile(splitPattern);
210
+ String[] sentenceTokens = pSpace.split(sentence.trim());
211
+ StringBuilder sb = new StringBuilder();
212
+
213
+
214
+ // Check for [PREN]
215
+ for (int i = 0; i < sentenceTokens.length; i++) {
216
+ sb.append(" ").append(sentenceTokens[i].trim());
217
+ if (sentenceTokens[i].trim().startsWith("[PREN]")) {
218
+
219
+ for (int j = i + 1; j < sentenceTokens.length; j++) {
220
+ if (sentenceTokens[j].trim().startsWith("[CONJ]")
221
+ || sentenceTokens[j].trim().startsWith("[PSEU]")
222
+ || sentenceTokens[j].trim().startsWith("[POST]")
223
+ || sentenceTokens[j].trim().startsWith("[PREP]")
224
+ || sentenceTokens[j].trim().startsWith("[POSP]")) {
225
+ break;
226
+ }
227
+ if (!tags[j].equalsIgnoreCase("cue")) {
228
+ tags[j] = "scope";
229
+ }
230
+ }
231
+ }
232
+ }
233
+
234
+ sentence = sb.toString().trim();
235
+ pSpace = Pattern.compile(splitPattern);
236
+ sentenceTokens = pSpace.split(sentence);
237
+ StringBuilder sb2 = new StringBuilder();
238
+
239
+ // Check for [POST]
240
+ for (int i = sentenceTokens.length - 1; i > 0; i--) {
241
+ sb2.insert(0, sentenceTokens[i] + " ");
242
+ if (sentenceTokens[i].trim().startsWith("[POST]")) {
243
+ for (int j = i - 1; j > 0; j--) {
244
+ if (sentenceTokens[j].trim().startsWith("[CONJ]")
245
+ || sentenceTokens[j].trim().startsWith("[PSEU]")
246
+ || sentenceTokens[j].trim().startsWith("[PREN]")
247
+ || sentenceTokens[j].trim().startsWith("[PREP]")
248
+ || sentenceTokens[j].trim().startsWith("[POSP]")) {
249
+ break;
250
+ }
251
+ if (!tags[j].equalsIgnoreCase("cue")) {
252
+ tags[j] = "scope";
253
+ }
254
+ }
255
+ }
256
+ }
257
+ }
258
+
259
+ public String negCheck(String sentenceString, String phraseString, ArrayList ruleStrings,
260
+ boolean negatePossible) throws Exception {
261
+
262
+ Sorter s = new Sorter();
263
+ String sToReturn = "";
264
+ String sScope = "";
265
+ String sentencePortion = "";
266
+
267
+ String filler = "_";
268
+ boolean negPoss = negatePossible;
269
+ boolean negationScope = true;
270
+
271
+ // Sort the rules by length in descending order.
272
+ // Rules need to be sorted so the longest rule is always tried to match
273
+ // first.
274
+ // Some of the rules overlap so without sorting first shorter rules (some of them POSSIBLE or PSEUDO)
275
+ // would match before longer legitimate negation rules.
276
+ //
277
+
278
+ // There is efficiency issue here. It is better if rules are sorted by the
279
+ // calling program once and used without sorting in GennegEx.
280
+ List<String> sortedRules = s.sortRules(ruleStrings);
281
+
282
+ // Process the sentence and tag each matched negation
283
+ // rule with correct negation rule tag.
284
+ //
285
+ // At the same time check for the phrase that we want to decide
286
+ // the negation status for and
287
+ // tag the phrase with [PHRASE] ... [PHRASE]
288
+ // In both the negation rules and in the phrase replace white space
289
+ // with "filler" string. (This could cause problems if the sentences
290
+ // we study has "filler" on their own.)
291
+
292
+ // Sentence needs one character in the beginning and end to match.
293
+ // We remove the extra characters after processing.
294
+ String sentence = "." + sentenceString + ".";
295
+
296
+ // Tag the phrases we want to detect for negation.
297
+ // Should happen before rule detection.
298
+ String phrase = phraseString;
299
+ Pattern pph = Pattern.compile(phrase.trim(), Pattern.CASE_INSENSITIVE);
300
+ Matcher mph = pph.matcher(sentence);
301
+
302
+ while (mph.find() == true) {
303
+ sentence = mph.replaceAll(" [PHRASE]" + mph.group().trim().replaceAll(" ", filler) + "[PHRASE]");
304
+ }
305
+
306
+ Iterator iRule = sortedRules.iterator();
307
+ while (iRule.hasNext()) {
308
+ String rule = (String) iRule.next();
309
+ Pattern p = Pattern.compile("[\\t]+"); // Working.
310
+ String[] ruleTokens = p.split(rule.trim());
311
+ // Add the regular expression characters to tokens and asemble the rule again.
312
+ String[] ruleMembers = ruleTokens[0].trim().split(" ");
313
+ String rule2 = "";
314
+ for (int i = 0; i <= ruleMembers.length - 1; i++) {
315
+ if (!ruleMembers[i].equals("")) {
316
+ if (ruleMembers.length == 1) {
317
+ rule2 = ruleMembers[i];
318
+ } else {
319
+ rule2 = rule2 + ruleMembers[i].trim() + "\\s+";
320
+ }
321
+ }
322
+ }
323
+ // Remove the last s+
324
+ if (rule2.endsWith("\\s+")) {
325
+ rule2 = rule2.substring(0, rule2.lastIndexOf("\\s+"));
326
+ }
327
+
328
+ rule2 = "(?m)(?i)[[\\p{Punct}&&[^\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]";
329
+
330
+ Pattern p2 = Pattern.compile(rule2.trim());
331
+ Matcher m = p2.matcher(sentence);
332
+
333
+ while (m.find() == true) {
334
+ sentence = m.replaceAll(" " + ruleTokens[1].trim() + m.group().trim().replaceAll(" ", filler) + ruleTokens[1].trim() + " ");
335
+ }
336
+ }
337
+
338
+
339
+ // Exchange the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED]
340
+ // based of PREN, POST rules and if flag is set to true
341
+ // then based on PREP and POSP, as well.
342
+
343
+ // Because PRENEGATION [PREN} is checked first it takes precedent over
344
+ // POSTNEGATION [POST].
345
+ // Similarly POSTNEGATION [POST] takes precedent over POSSIBLE PRENEGATION [PREP]
346
+ // and [PREP] takes precedent over POSSIBLE POSTNEGATION [POSP].
347
+
348
+ Pattern pSpace = Pattern.compile("[\\s+]");
349
+ String[] sentenceTokens = pSpace.split(sentence);
350
+ StringBuilder sb = new StringBuilder();
351
+
352
+
353
+ // Check for [PREN]
354
+ for (int i = 0; i < sentenceTokens.length; i++) {
355
+ sb.append(" " + sentenceTokens[i].trim());
356
+ if (sentenceTokens[i].trim().startsWith("[PREN]")) {
357
+
358
+ for (int j = i + 1; j < sentenceTokens.length; j++) {
359
+ if (sentenceTokens[j].trim().startsWith("[CONJ]")
360
+ || sentenceTokens[j].trim().startsWith("[PSEU]")
361
+ || sentenceTokens[j].trim().startsWith("[POST]")
362
+ || sentenceTokens[j].trim().startsWith("[PREP]")
363
+ || sentenceTokens[j].trim().startsWith("[POSP]")) {
364
+ break;
365
+ }
366
+
367
+ if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
368
+ sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[NEGATED]");
369
+ }
370
+ }
371
+ }
372
+ }
373
+
374
+ sentence = sb.toString();
375
+ pSpace = Pattern.compile("[\\s+]");
376
+ sentenceTokens = pSpace.split(sentence);
377
+ StringBuilder sb2 = new StringBuilder();
378
+
379
+ // Check for [POST]
380
+ for (int i = sentenceTokens.length - 1; i > 0; i--) {
381
+ sb2.insert(0, sentenceTokens[i] + " ");
382
+ if (sentenceTokens[i].trim().startsWith("[POST]")) {
383
+ for (int j = i - 1; j > 0; j--) {
384
+ if (sentenceTokens[j].trim().startsWith("[CONJ]")
385
+ || sentenceTokens[j].trim().startsWith("[PSEU]")
386
+ || sentenceTokens[j].trim().startsWith("[PREN]")
387
+ || sentenceTokens[j].trim().startsWith("[PREP]")
388
+ || sentenceTokens[j].trim().startsWith("[POSP]")) {
389
+ break;
390
+ }
391
+
392
+ if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
393
+ sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[NEGATED]");
394
+ }
395
+ }
396
+ }
397
+ }
398
+
399
+ sentence = sb2.toString();
400
+
401
+ // If POSSIBLE negation is detected as negation.
402
+ // negatePossible being set to "true" then check for [PREP] and [POSP].
403
+ if (negPoss == true) {
404
+ pSpace = Pattern.compile("[\\s+]");
405
+ sentenceTokens = pSpace.split(sentence);
406
+
407
+ StringBuilder sb3 = new StringBuilder();
408
+
409
+ // Check for [PREP]
410
+ for (int i = 0; i < sentenceTokens.length; i++) {
411
+ sb3.append(" " + sentenceTokens[i].trim());
412
+ if (sentenceTokens[i].trim().startsWith("[PREP]")) {
413
+
414
+ for (int j = i + 1; j < sentenceTokens.length; j++) {
415
+ if (sentenceTokens[j].trim().startsWith("[CONJ]")
416
+ || sentenceTokens[j].trim().startsWith("[PSEU]")
417
+ || sentenceTokens[j].trim().startsWith("[POST]")
418
+ || sentenceTokens[j].trim().startsWith("[PREN]")
419
+ || sentenceTokens[j].trim().startsWith("[POSP]")) {
420
+ break;
421
+ }
422
+
423
+ if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
424
+ sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[POSSIBLE]");
425
+ }
426
+ }
427
+ }
428
+ }
429
+
430
+ sentence = sb3.toString();
431
+ pSpace = Pattern.compile("[\\s+]");
432
+ sentenceTokens = pSpace.split(sentence);
433
+ StringBuilder sb4 = new StringBuilder();
434
+
435
+ // Check for [POSP]
436
+ for (int i = sentenceTokens.length - 1; i > 0; i--) {
437
+ sb4.insert(0, sentenceTokens[i] + " ");
438
+ if (sentenceTokens[i].trim().startsWith("[POSP]")) {
439
+ for (int j = i - 1; j > 0; j--) {
440
+ if (sentenceTokens[j].trim().startsWith("[CONJ]")
441
+ || sentenceTokens[j].trim().startsWith("[PSEU]")
442
+ || sentenceTokens[j].trim().startsWith("[PREN]")
443
+ || sentenceTokens[j].trim().startsWith("[PREP]")
444
+ || sentenceTokens[j].trim().startsWith("[POST]")) {
445
+ break;
446
+ }
447
+
448
+ if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
449
+ sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[POSSIBLE]");
450
+ }
451
+ }
452
+ }
453
+ }
454
+
455
+ sentence = sb4.toString();
456
+ }
457
+
458
+ // Remove the filler character we used.
459
+ sentence = sentence.replaceAll(filler, " ");
460
+
461
+ // Remove the extra periods at the beginning
462
+ // and end of the sentence.
463
+ sentence = sentence.substring(0, sentence.trim().lastIndexOf('.'));
464
+ sentence = sentence.replaceFirst(".", "");
465
+
466
+ // Get the scope of the negation for PREN and PREP
467
+ if (sentence.contains("[PREN]") || sentence.contains("[PREP]")) {
468
+ int startOffset = sentence.indexOf("[PREN]");
469
+ if (startOffset == -1) {
470
+ startOffset = sentence.indexOf("[PREP]");
471
+ }
472
+
473
+ int endOffset = sentence.indexOf("[CONJ]");
474
+ if (endOffset == -1) {
475
+ endOffset = sentence.indexOf("[PSEU]");
476
+ }
477
+ if (endOffset == -1) {
478
+ endOffset = sentence.indexOf("[POST]");
479
+ }
480
+ if (endOffset == -1) {
481
+ endOffset = sentence.indexOf("[POSP]");
482
+ }
483
+ if (endOffset == -1 || endOffset < startOffset) {
484
+ endOffset = sentence.length() - 1;
485
+ }
486
+ sScope = sentence.substring(startOffset, endOffset + 1);
487
+ }
488
+
489
+ // Get the scope of the negation for POST and POSP
490
+ if (sentence.contains("[POST]") || sentence.contains("[POSP]")) {
491
+ int endOffset = sentence.lastIndexOf("[POST]");
492
+ if (endOffset == -1) {
493
+ endOffset = sentence.lastIndexOf("[POSP]");
494
+ }
495
+
496
+ int startOffset = sentence.lastIndexOf("[CONJ]");
497
+ if (startOffset == -1) {
498
+ startOffset = sentence.lastIndexOf("[PSEU]");
499
+ }
500
+ if (startOffset == -1) {
501
+ startOffset = sentence.lastIndexOf("[PREN]");
502
+ }
503
+ if (startOffset == -1) {
504
+ startOffset = sentence.lastIndexOf("[PREP]");
505
+ }
506
+ if (startOffset == -1) {
507
+ startOffset = 0;
508
+ }
509
+ sScope = sentence.substring(startOffset, endOffset);
510
+ }
511
+
512
+ // Classify to: negated/possible/affirmed
513
+ if (sentence.contains("[NEGATED]")) {
514
+ sentence = sentence + "\t" + "negated" + "\t" + sScope;
515
+ } else if (sentence.contains("[POSSIBLE]")) {
516
+ sentence = sentence + "\t" + "possible" + "\t" + sScope;
517
+ } else {
518
+ sentence = sentence + "\t" + "affirmed" + "\t" + sScope;
519
+ }
520
+ previousRunScope = sScope;
521
+
522
+ sToReturn = sentence;
523
+
524
+ return sToReturn;
525
+ }
526
+
527
+ public String getPreviousRunScope() {
528
+ return previousRunScope;
529
+ }
530
+ }