abstractor 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. checksums.yaml +15 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +14 -0
  4. data/Rakefile +33 -0
  5. data/app/assets/images/abstractor/add.png +0 -0
  6. data/app/assets/images/abstractor/ajax-loader.gif +0 -0
  7. data/app/assets/images/abstractor/bar.gif +0 -0
  8. data/app/assets/images/abstractor/bti_logo.jpg +0 -0
  9. data/app/assets/images/abstractor/bullet_arrow_down.png +0 -0
  10. data/app/assets/images/abstractor/bullet_arrow_up.png +0 -0
  11. data/app/assets/images/abstractor/cog.png +0 -0
  12. data/app/assets/images/abstractor/delete.png +0 -0
  13. data/app/assets/images/abstractor/edit.png +0 -0
  14. data/app/assets/images/abstractor/excel.png +0 -0
  15. data/app/assets/images/abstractor/favicon.ico +0 -0
  16. data/app/assets/images/abstractor/greencheck.gif +0 -0
  17. data/app/assets/images/abstractor/loading.gif +0 -0
  18. data/app/assets/images/abstractor/nu_logo.jpg +0 -0
  19. data/app/assets/images/abstractor/nubic_logo.png +0 -0
  20. data/app/assets/images/abstractor/page.png +0 -0
  21. data/app/assets/images/abstractor/rails.png +0 -0
  22. data/app/assets/images/abstractor/robert_h_lurie.jpg +0 -0
  23. data/app/assets/images/abstractor/show.png +0 -0
  24. data/app/assets/images/abstractor/switch_minus.gif +0 -0
  25. data/app/assets/images/abstractor/switch_plus.gif +0 -0
  26. data/app/assets/javascripts/abstractor/abstractor.js +89 -0
  27. data/app/assets/javascripts/abstractor/application.js +21 -0
  28. data/app/assets/javascripts/abstractor/combobox.js +301 -0
  29. data/app/assets/javascripts/abstractor/jquery/jquery.form.js +1074 -0
  30. data/app/assets/javascripts/abstractor/nested_attributes.js +69 -0
  31. data/app/assets/stylesheets/abstractor/abstractor_abstractions.css +312 -0
  32. data/app/assets/stylesheets/abstractor/application.css +21 -0
  33. data/app/assets/stylesheets/abstractor/jquery.ui.override.css.scss +73 -0
  34. data/app/controllers/abstractor/abstractor_abstraction_groups_controller.rb +5 -0
  35. data/app/controllers/abstractor/abstractor_abstractions_controller.rb +5 -0
  36. data/app/controllers/abstractor/abstractor_suggestions_controller.rb +5 -0
  37. data/app/controllers/abstractor/application_controller.rb +2 -0
  38. data/app/helpers/abstractor/application_helper.rb +4 -0
  39. data/app/models/abstractor/abstractor_abstraction.rb +6 -0
  40. data/app/models/abstractor/abstractor_abstraction_group.rb +6 -0
  41. data/app/models/abstractor/abstractor_abstraction_group_member.rb +6 -0
  42. data/app/models/abstractor/abstractor_abstraction_schema.rb +6 -0
  43. data/app/models/abstractor/abstractor_abstraction_schema_object_value.rb +6 -0
  44. data/app/models/abstractor/abstractor_abstraction_schema_predicate_variant.rb +6 -0
  45. data/app/models/abstractor/abstractor_abstraction_schema_relation.rb +6 -0
  46. data/app/models/abstractor/abstractor_abstraction_source.rb +6 -0
  47. data/app/models/abstractor/abstractor_object_type.rb +6 -0
  48. data/app/models/abstractor/abstractor_object_value.rb +6 -0
  49. data/app/models/abstractor/abstractor_object_value_variant.rb +6 -0
  50. data/app/models/abstractor/abstractor_relation_type.rb +6 -0
  51. data/app/models/abstractor/abstractor_rule_type.rb +6 -0
  52. data/app/models/abstractor/abstractor_subject.rb +6 -0
  53. data/app/models/abstractor/abstractor_subject_group.rb +6 -0
  54. data/app/models/abstractor/abstractor_subject_group_member.rb +6 -0
  55. data/app/models/abstractor/abstractor_subject_relation.rb +6 -0
  56. data/app/models/abstractor/abstractor_suggestion.rb +6 -0
  57. data/app/models/abstractor/abstractor_suggestion_object_value.rb +6 -0
  58. data/app/models/abstractor/abstractor_suggestion_source.rb +6 -0
  59. data/app/models/abstractor/abstractor_suggestion_status.rb +6 -0
  60. data/app/views/abstractor/abstractor_abstraction_groups/_form.html.haml +10 -0
  61. data/app/views/abstractor/abstractor_abstraction_groups/edit.html.haml +1 -0
  62. data/app/views/abstractor/abstractor_abstractions/_fields.html.haml +63 -0
  63. data/app/views/abstractor/abstractor_abstractions/_list.html.haml +45 -0
  64. data/app/views/abstractor/abstractor_abstractions/edit.html.haml +53 -0
  65. data/app/views/abstractor/abstractor_abstractions/show.html.haml +1 -0
  66. data/app/views/abstractor/shared/_error_messages.html.haml +5 -0
  67. data/config/cucumber.yml +8 -0
  68. data/config/routes.rb +7 -0
  69. data/db/migrate/20131227205140_create_abstractor_object_types.rb +10 -0
  70. data/db/migrate/20131227205219_create_abstractor_object_values.rb +10 -0
  71. data/db/migrate/20131227205256_create_abstractor_object_value_variants.rb +11 -0
  72. data/db/migrate/20131227205320_create_abstractor_relation_types.rb +10 -0
  73. data/db/migrate/20131227205354_create_abstractor_rule_types.rb +11 -0
  74. data/db/migrate/20131227205432_create_abstractor_abstraction_schemas.rb +13 -0
  75. data/db/migrate/20131227205456_create_abstractor_abstraction_schema_object_values.rb +10 -0
  76. data/db/migrate/20131227205529_create_abstractor_abstraction_schema_predicate_variants.rb +11 -0
  77. data/db/migrate/20131227205610_create_abstractor_abstraction_schema_relations.rb +11 -0
  78. data/db/migrate/20131227205652_create_abstractor_subjects.rb +12 -0
  79. data/db/migrate/20131227205732_create_abstractor_subject_relations.rb +11 -0
  80. data/db/migrate/20131227205831_create_abstractor_abstraction_sources.rb +11 -0
  81. data/db/migrate/20131227210211_create_abstractor_subject_groups.rb +10 -0
  82. data/db/migrate/20131227210244_create_abstractor_subject_group_members.rb +11 -0
  83. data/db/migrate/20131227210350_create_abstractor_abstraction_groups.rb +12 -0
  84. data/db/migrate/20131227210353_create_abstractor_abstraction_group_members.rb +11 -0
  85. data/db/migrate/20131227211050_create_abstractor_suggestion_statuses.rb +10 -0
  86. data/db/migrate/20131227211303_create_abstractor_abstractions.rb +15 -0
  87. data/db/migrate/20131227213427_create_abstractor_suggestions.rb +14 -0
  88. data/db/migrate/20131228041944_create_abstractor_suggestion_object_values.rb +11 -0
  89. data/db/migrate/20131228041945_create_abstractor_suggestion_sources.rb +16 -0
  90. data/db/seeds.rb +0 -0
  91. data/lib/abstractor.rb +8 -0
  92. data/lib/abstractor/abstractable.rb +190 -0
  93. data/lib/abstractor/core_ext/string.rb +99 -0
  94. data/lib/abstractor/engine.rb +14 -0
  95. data/lib/abstractor/methods/controllers/abstractor_abstraction_groups_controller.rb +37 -0
  96. data/lib/abstractor/methods/controllers/abstractor_abstractions_controller.rb +42 -0
  97. data/lib/abstractor/methods/controllers/abstractor_suggestions_controller.rb +28 -0
  98. data/lib/abstractor/methods/models/abstractor_abstraction.rb +65 -0
  99. data/lib/abstractor/methods/models/abstractor_abstraction_group.rb +37 -0
  100. data/lib/abstractor/methods/models/abstractor_abstraction_group_member.rb +17 -0
  101. data/lib/abstractor/methods/models/abstractor_abstraction_schema.rb +27 -0
  102. data/lib/abstractor/methods/models/abstractor_abstraction_schema_object_value.rb +17 -0
  103. data/lib/abstractor/methods/models/abstractor_abstraction_schema_predicate_variant.rb +16 -0
  104. data/lib/abstractor/methods/models/abstractor_abstraction_schema_relation.rb +18 -0
  105. data/lib/abstractor/methods/models/abstractor_abstraction_source.rb +29 -0
  106. data/lib/abstractor/methods/models/abstractor_object_type.rb +16 -0
  107. data/lib/abstractor/methods/models/abstractor_object_value.rb +24 -0
  108. data/lib/abstractor/methods/models/abstractor_object_value_variant.rb +16 -0
  109. data/lib/abstractor/methods/models/abstractor_relation_type.rb +16 -0
  110. data/lib/abstractor/methods/models/abstractor_rule_type.rb +17 -0
  111. data/lib/abstractor/methods/models/abstractor_subject.rb +258 -0
  112. data/lib/abstractor/methods/models/abstractor_subject_group.rb +19 -0
  113. data/lib/abstractor/methods/models/abstractor_subject_group_member.rb +17 -0
  114. data/lib/abstractor/methods/models/abstractor_subject_relation.rb +18 -0
  115. data/lib/abstractor/methods/models/abstractor_suggestion.rb +88 -0
  116. data/lib/abstractor/methods/models/abstractor_suggestion_object_value.rb +17 -0
  117. data/lib/abstractor/methods/models/abstractor_suggestion_source.rb +17 -0
  118. data/lib/abstractor/methods/models/abstractor_suggestion_status.rb +29 -0
  119. data/lib/abstractor/methods/models/soft_delete.rb +35 -0
  120. data/lib/abstractor/negation_detection.rb +43 -0
  121. data/lib/abstractor/parser.rb +76 -0
  122. data/lib/abstractor/setup.rb +24 -0
  123. data/lib/abstractor/user_interface.rb +40 -0
  124. data/lib/abstractor/utility.rb +8 -0
  125. data/lib/abstractor/version.rb +3 -0
  126. data/lib/generators/abstractor/install/install_generator.rb +118 -0
  127. data/lib/generators/abstractor/install/templates/controllers/abstractor_abstraction_groups_controller.rb +16 -0
  128. data/lib/generators/abstractor/install/templates/controllers/abstractor_abstractions_controller.rb +24 -0
  129. data/lib/generators/abstractor/install/templates/controllers/abstractor_suggestions_controller.rb +12 -0
  130. data/lib/generators/abstractor/install/templates/dictionaries.yml +235 -0
  131. data/lib/generators/abstractor/install/templates/helpers/abstractions_helper.rb +9 -0
  132. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction.rb +9 -0
  133. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group.rb +9 -0
  134. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group_member.rb +9 -0
  135. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema.rb +9 -0
  136. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_object_value.rb +9 -0
  137. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_predicate_variant.rb +9 -0
  138. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_relation.rb +9 -0
  139. data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_source.rb +9 -0
  140. data/lib/generators/abstractor/install/templates/models/abstractor_object_type.rb +9 -0
  141. data/lib/generators/abstractor/install/templates/models/abstractor_object_value.rb +9 -0
  142. data/lib/generators/abstractor/install/templates/models/abstractor_object_value_variant.rb +9 -0
  143. data/lib/generators/abstractor/install/templates/models/abstractor_relation_type.rb +9 -0
  144. data/lib/generators/abstractor/install/templates/models/abstractor_rule_type.rb +9 -0
  145. data/lib/generators/abstractor/install/templates/models/abstractor_subject.rb +9 -0
  146. data/lib/generators/abstractor/install/templates/models/abstractor_subject_group.rb +9 -0
  147. data/lib/generators/abstractor/install/templates/models/abstractor_subject_group_member.rb +9 -0
  148. data/lib/generators/abstractor/install/templates/models/abstractor_subject_relation.rb +9 -0
  149. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion.rb +9 -0
  150. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_object_value.rb +9 -0
  151. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_source.rb +9 -0
  152. data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_status.rb +9 -0
  153. data/lib/lingscope/build.xml +74 -0
  154. data/lib/lingscope/build/built-jar.properties +4 -0
  155. data/lib/lingscope/build/classes/lingscope/algorithms/AbnerTokenizer.class +0 -0
  156. data/lib/lingscope/build/classes/lingscope/algorithms/AnnotationComparer.class +0 -0
  157. data/lib/lingscope/build/classes/lingscope/algorithms/Annotator.class +0 -0
  158. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineAnnotator.class +0 -0
  159. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineCueAnnotator.class +0 -0
  160. data/lib/lingscope/build/classes/lingscope/algorithms/BaselineScopeAnnotator.class +0 -0
  161. data/lib/lingscope/build/classes/lingscope/algorithms/CrfAnnotator.class +0 -0
  162. data/lib/lingscope/build/classes/lingscope/algorithms/NegexAnnotator.class +0 -0
  163. data/lib/lingscope/build/classes/lingscope/algorithms/NegexCueAnnotator.class +0 -0
  164. data/lib/lingscope/build/classes/lingscope/algorithms/NegexScopeAnnotator.class +0 -0
  165. data/lib/lingscope/build/classes/lingscope/algorithms/PosTagger.class +0 -0
  166. data/lib/lingscope/build/classes/lingscope/algorithms/negex/GenNegEx.class +0 -0
  167. data/lib/lingscope/build/classes/lingscope/algorithms/negex/Sorter.class +0 -0
  168. data/lib/lingscope/build/classes/lingscope/drivers/AnnotatedFilesMerger.class +0 -0
  169. data/lib/lingscope/build/classes/lingscope/drivers/AnnotationComparerDriver.class +0 -0
  170. data/lib/lingscope/build/classes/lingscope/drivers/BaselineDriver.class +0 -0
  171. data/lib/lingscope/build/classes/lingscope/drivers/CrfDriver.class +0 -0
  172. data/lib/lingscope/build/classes/lingscope/drivers/CueAndPosFilesMerger.class +0 -0
  173. data/lib/lingscope/build/classes/lingscope/drivers/ModelTrainer.class +0 -0
  174. data/lib/lingscope/build/classes/lingscope/drivers/NegexDriver.class +0 -0
  175. data/lib/lingscope/build/classes/lingscope/drivers/PosTaggerDriver.class +0 -0
  176. data/lib/lingscope/build/classes/lingscope/drivers/SentencePosTagger.class +0 -0
  177. data/lib/lingscope/build/classes/lingscope/drivers/SentenceTagger.class +0 -0
  178. data/lib/lingscope/build/classes/lingscope/io/AnnotatedSentencesIO.class +0 -0
  179. data/lib/lingscope/build/classes/lingscope/structures/AnnotatedSentence.class +0 -0
  180. data/lib/lingscope/dist/README.TXT +32 -0
  181. data/lib/lingscope/dist/javadoc/allclasses-frame.html +80 -0
  182. data/lib/lingscope/dist/javadoc/allclasses-noframe.html +80 -0
  183. data/lib/lingscope/dist/javadoc/constant-values.html +199 -0
  184. data/lib/lingscope/dist/javadoc/deprecated-list.html +147 -0
  185. data/lib/lingscope/dist/javadoc/help-doc.html +224 -0
  186. data/lib/lingscope/dist/javadoc/index-files/index-1.html +188 -0
  187. data/lib/lingscope/dist/javadoc/index-files/index-10.html +149 -0
  188. data/lib/lingscope/dist/javadoc/index-files/index-11.html +158 -0
  189. data/lib/lingscope/dist/javadoc/index-files/index-12.html +157 -0
  190. data/lib/lingscope/dist/javadoc/index-files/index-13.html +177 -0
  191. data/lib/lingscope/dist/javadoc/index-files/index-14.html +155 -0
  192. data/lib/lingscope/dist/javadoc/index-files/index-15.html +152 -0
  193. data/lib/lingscope/dist/javadoc/index-files/index-16.html +146 -0
  194. data/lib/lingscope/dist/javadoc/index-files/index-2.html +158 -0
  195. data/lib/lingscope/dist/javadoc/index-files/index-3.html +165 -0
  196. data/lib/lingscope/dist/javadoc/index-files/index-4.html +146 -0
  197. data/lib/lingscope/dist/javadoc/index-files/index-5.html +219 -0
  198. data/lib/lingscope/dist/javadoc/index-files/index-6.html +149 -0
  199. data/lib/lingscope/dist/javadoc/index-files/index-7.html +155 -0
  200. data/lib/lingscope/dist/javadoc/index-files/index-8.html +185 -0
  201. data/lib/lingscope/dist/javadoc/index-files/index-9.html +164 -0
  202. data/lib/lingscope/dist/javadoc/index.html +74 -0
  203. data/lib/lingscope/dist/javadoc/lingscope/algorithms/AbnerTokenizer.html +280 -0
  204. data/lib/lingscope/dist/javadoc/lingscope/algorithms/AnnotationComparer.html +526 -0
  205. data/lib/lingscope/dist/javadoc/lingscope/algorithms/Annotator.html +401 -0
  206. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineAnnotator.html +375 -0
  207. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineCueAnnotator.html +309 -0
  208. data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineScopeAnnotator.html +340 -0
  209. data/lib/lingscope/dist/javadoc/lingscope/algorithms/CrfAnnotator.html +340 -0
  210. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexAnnotator.html +364 -0
  211. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexCueAnnotator.html +309 -0
  212. data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexScopeAnnotator.html +309 -0
  213. data/lib/lingscope/dist/javadoc/lingscope/algorithms/PosTagger.html +268 -0
  214. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AbnerTokenizer.html +145 -0
  215. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AnnotationComparer.html +145 -0
  216. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/Annotator.html +299 -0
  217. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineAnnotator.html +189 -0
  218. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineCueAnnotator.html +145 -0
  219. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineScopeAnnotator.html +145 -0
  220. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/CrfAnnotator.html +145 -0
  221. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexAnnotator.html +189 -0
  222. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexCueAnnotator.html +145 -0
  223. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexScopeAnnotator.html +145 -0
  224. data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/PosTagger.html +145 -0
  225. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/GenNegEx.html +369 -0
  226. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/Sorter.html +253 -0
  227. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/GenNegEx.html +181 -0
  228. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/Sorter.html +145 -0
  229. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-frame.html +35 -0
  230. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-summary.html +165 -0
  231. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-tree.html +154 -0
  232. data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-use.html +174 -0
  233. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-frame.html +53 -0
  234. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-summary.html +198 -0
  235. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-tree.html +160 -0
  236. data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-use.html +202 -0
  237. data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotatedFilesMerger.html +284 -0
  238. data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotationComparerDriver.html +258 -0
  239. data/lib/lingscope/dist/javadoc/lingscope/drivers/BaselineDriver.html +260 -0
  240. data/lib/lingscope/dist/javadoc/lingscope/drivers/CrfDriver.html +262 -0
  241. data/lib/lingscope/dist/javadoc/lingscope/drivers/CueAndPosFilesMerger.html +310 -0
  242. data/lib/lingscope/dist/javadoc/lingscope/drivers/ModelTrainer.html +281 -0
  243. data/lib/lingscope/dist/javadoc/lingscope/drivers/NegexDriver.html +262 -0
  244. data/lib/lingscope/dist/javadoc/lingscope/drivers/PosTaggerDriver.html +289 -0
  245. data/lib/lingscope/dist/javadoc/lingscope/drivers/SentencePosTagger.html +313 -0
  246. data/lib/lingscope/dist/javadoc/lingscope/drivers/SentenceTagger.html +512 -0
  247. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotatedFilesMerger.html +145 -0
  248. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotationComparerDriver.html +145 -0
  249. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/BaselineDriver.html +145 -0
  250. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CrfDriver.html +145 -0
  251. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CueAndPosFilesMerger.html +145 -0
  252. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/ModelTrainer.html +145 -0
  253. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/NegexDriver.html +145 -0
  254. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/PosTaggerDriver.html +145 -0
  255. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentencePosTagger.html +145 -0
  256. data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentenceTagger.html +145 -0
  257. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-frame.html +51 -0
  258. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-summary.html +195 -0
  259. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-tree.html +154 -0
  260. data/lib/lingscope/dist/javadoc/lingscope/drivers/package-use.html +145 -0
  261. data/lib/lingscope/dist/javadoc/lingscope/io/AnnotatedSentencesIO.html +285 -0
  262. data/lib/lingscope/dist/javadoc/lingscope/io/class-use/AnnotatedSentencesIO.html +145 -0
  263. data/lib/lingscope/dist/javadoc/lingscope/io/package-frame.html +33 -0
  264. data/lib/lingscope/dist/javadoc/lingscope/io/package-summary.html +158 -0
  265. data/lib/lingscope/dist/javadoc/lingscope/io/package-tree.html +154 -0
  266. data/lib/lingscope/dist/javadoc/lingscope/io/package-use.html +145 -0
  267. data/lib/lingscope/dist/javadoc/lingscope/structures/AnnotatedSentence.html +425 -0
  268. data/lib/lingscope/dist/javadoc/lingscope/structures/class-use/AnnotatedSentence.html +423 -0
  269. data/lib/lingscope/dist/javadoc/lingscope/structures/package-frame.html +33 -0
  270. data/lib/lingscope/dist/javadoc/lingscope/structures/package-summary.html +158 -0
  271. data/lib/lingscope/dist/javadoc/lingscope/structures/package-tree.html +154 -0
  272. data/lib/lingscope/dist/javadoc/lingscope/structures/package-use.html +209 -0
  273. data/lib/lingscope/dist/javadoc/overview-frame.html +51 -0
  274. data/lib/lingscope/dist/javadoc/overview-summary.html +168 -0
  275. data/lib/lingscope/dist/javadoc/overview-tree.html +159 -0
  276. data/lib/lingscope/dist/javadoc/package-list +5 -0
  277. data/lib/lingscope/dist/javadoc/resources/inherit.gif +0 -0
  278. data/lib/lingscope/dist/javadoc/stylesheet.css +29 -0
  279. data/lib/lingscope/dist/lib/abner.jar +0 -0
  280. data/lib/lingscope/dist/lib/commons-lang-2.4.jar +0 -0
  281. data/lib/lingscope/dist/lib/java_utils.jar +0 -0
  282. data/lib/lingscope/dist/lib/sptoolkit.jar +0 -0
  283. data/lib/lingscope/dist/lib/stanford-postagger.jar +0 -0
  284. data/lib/lingscope/dist/lingscope.jar +0 -0
  285. data/lib/lingscope/lingscope.zip +0 -0
  286. data/lib/lingscope/manifest.mf +3 -0
  287. data/lib/lingscope/nbproject/build-impl.xml +1338 -0
  288. data/lib/lingscope/nbproject/genfiles.properties +8 -0
  289. data/lib/lingscope/nbproject/private/config.properties +0 -0
  290. data/lib/lingscope/nbproject/private/private.properties +10 -0
  291. data/lib/lingscope/nbproject/private/private.xml +5 -0
  292. data/lib/lingscope/nbproject/project.properties +85 -0
  293. data/lib/lingscope/nbproject/project.xml +15 -0
  294. data/lib/lingscope/negation_models.zip +0 -0
  295. data/lib/lingscope/negation_models/baseline_cue_all_both.model +49 -0
  296. data/lib/lingscope/negation_models/baseline_cue_all_clinical.model +19 -0
  297. data/lib/lingscope/negation_models/crf_cue_all_both.model +0 -0
  298. data/lib/lingscope/negation_models/crf_cue_all_clinical.model +0 -0
  299. data/lib/lingscope/negation_models/crf_scope_cue_crf_all_both.model +0 -0
  300. data/lib/lingscope/negation_models/crf_scope_cue_crf_all_clinical.model +0 -0
  301. data/lib/lingscope/negation_models/crf_scope_cue_regex_all_both.model +0 -0
  302. data/lib/lingscope/negation_models/crf_scope_cue_regex_all_clinical.model +0 -0
  303. data/lib/lingscope/negation_models/crf_scope_words_all_both.model +0 -0
  304. data/lib/lingscope/negation_models/crf_scope_words_all_clinical.model +0 -0
  305. data/lib/lingscope/negation_models/crf_scope_words_crf_all_both.model +0 -0
  306. data/lib/lingscope/negation_models/crf_scope_words_crf_all_clinical.model +0 -0
  307. data/lib/lingscope/negation_models/crf_scope_words_regex_all_both.model +0 -0
  308. data/lib/lingscope/negation_models/crf_scope_words_regex_all_clinical.model +0 -0
  309. data/lib/lingscope/src/lingscope/algorithms/AbnerTokenizer.java +58 -0
  310. data/lib/lingscope/src/lingscope/algorithms/AnnotationComparer.java +207 -0
  311. data/lib/lingscope/src/lingscope/algorithms/Annotator.java +51 -0
  312. data/lib/lingscope/src/lingscope/algorithms/BaselineAnnotator.java +80 -0
  313. data/lib/lingscope/src/lingscope/algorithms/BaselineCueAnnotator.java +84 -0
  314. data/lib/lingscope/src/lingscope/algorithms/BaselineScopeAnnotator.java +101 -0
  315. data/lib/lingscope/src/lingscope/algorithms/CrfAnnotator.java +45 -0
  316. data/lib/lingscope/src/lingscope/algorithms/NegexAnnotator.java +52 -0
  317. data/lib/lingscope/src/lingscope/algorithms/NegexCueAnnotator.java +26 -0
  318. data/lib/lingscope/src/lingscope/algorithms/NegexScopeAnnotator.java +26 -0
  319. data/lib/lingscope/src/lingscope/algorithms/PosTagger.java +54 -0
  320. data/lib/lingscope/src/lingscope/algorithms/negex/GenNegEx.java +530 -0
  321. data/lib/lingscope/src/lingscope/algorithms/negex/Sorter.java +36 -0
  322. data/lib/lingscope/src/lingscope/drivers/AnnotatedFilesMerger.java +61 -0
  323. data/lib/lingscope/src/lingscope/drivers/AnnotationComparerDriver.java +22 -0
  324. data/lib/lingscope/src/lingscope/drivers/BaselineDriver.java +45 -0
  325. data/lib/lingscope/src/lingscope/drivers/CrfDriver.java +31 -0
  326. data/lib/lingscope/src/lingscope/drivers/CueAndPosFilesMerger.java +86 -0
  327. data/lib/lingscope/src/lingscope/drivers/ModelTrainer.java +39 -0
  328. data/lib/lingscope/src/lingscope/drivers/NegexDriver.java +32 -0
  329. data/lib/lingscope/src/lingscope/drivers/PosTaggerDriver.java +62 -0
  330. data/lib/lingscope/src/lingscope/drivers/SentencePosTagger.java +89 -0
  331. data/lib/lingscope/src/lingscope/drivers/SentenceTagger.java +158 -0
  332. data/lib/lingscope/src/lingscope/io/AnnotatedSentencesIO.java +53 -0
  333. data/lib/lingscope/src/lingscope/structures/AnnotatedSentence.java +105 -0
  334. data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.csv +1 -0
  335. data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.txt +2273 -0
  336. data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.csv +1 -0
  337. data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.txt +1342 -0
  338. data/lib/setup/data/ICD-O-3_CSV-metadata/icd-o3 readme.txt +21 -0
  339. data/lib/setup/data/custom_site_synonyms.csv +1 -0
  340. data/lib/setup/data/diagnosis_categorizations.csv +1 -0
  341. data/lib/setup/data/high_level_primary_cns_diagnosis_categorizations.yml +597 -0
  342. data/lib/setup/data/icdo3_sites_with_laterality.txt +11 -0
  343. data/lib/setup/data/site_site_categories.txt +28 -0
  344. data/lib/tasks/abstractor_tasks.rake +22 -0
  345. data/lib/tasks/cucumber.rake +65 -0
  346. metadata +754 -0
@@ -0,0 +1,51 @@
1
+ package lingscope.algorithms;
2
+
3
+ import java.util.List;
4
+ import lingscope.structures.AnnotatedSentence;
5
+
6
+ /**
7
+ *
8
+ * @author shashank
9
+ */
10
+ public abstract class Annotator {
11
+
12
+ protected String beginTag;
13
+ protected String interTag;
14
+ protected String otherTag;
15
+
16
+ public Annotator(String beginTag, String interTag, String otherTag) {
17
+ this.beginTag = beginTag;
18
+ this.interTag = interTag;
19
+ this.otherTag = otherTag;
20
+ }
21
+
22
+ public abstract void serializeAnnotator(String trainingFile, String modelFile);
23
+
24
+ public abstract AnnotatedSentence annotateSentence(String sentence, boolean isTokenized);
25
+
26
+ public abstract void loadAnnotator(String modelFile);
27
+
28
+ /**
29
+ * Checks if the given target phrase is negated in the given sentence. Only
30
+ * the first word of the target phrase is used
31
+ * @param sentence
32
+ * @param isTokenized
33
+ * @param targetPhrase
34
+ * @return
35
+ */
36
+ public boolean isTargetNegated(String sentence, boolean isTokenized, String targetPhrase) {
37
+ AnnotatedSentence annotatedSentence = annotateSentence(sentence, isTokenized);
38
+ String[] targetPhraseWords = targetPhrase.split("\\s+");
39
+ List<String> words = annotatedSentence.getWords();
40
+ List<Boolean> areNegated = annotatedSentence.getIsAnnotatedTags();
41
+ int index = 0;
42
+ for (String word : words) {
43
+ if (targetPhraseWords[0].equalsIgnoreCase(word)) {
44
+ return areNegated.get(index);
45
+ }
46
+ ++index;
47
+ }
48
+ System.err.println("Phrase not found: " + targetPhrase);
49
+ return false;
50
+ }
51
+ }
@@ -0,0 +1,80 @@
1
+ package lingscope.algorithms;
2
+
3
+ import generalutils.FileOperations;
4
+ import java.util.ArrayList;
5
+ import java.util.HashSet;
6
+ import java.util.List;
7
+ import java.util.Set;
8
+ import java.util.logging.Level;
9
+ import java.util.logging.Logger;
10
+
11
+ /**
12
+ * The baseline annotator
13
+ * @author shashank
14
+ */
15
+ public abstract class BaselineAnnotator extends Annotator {
16
+
17
+ protected Set<String> phrases;
18
+
19
+ public BaselineAnnotator(String beginTag, String interTag, String otherTag) {
20
+ super(beginTag, interTag, otherTag);
21
+ phrases = null;
22
+ }
23
+
24
+ @Override
25
+ public void serializeAnnotator(String trainingFile, String modelFile) {
26
+ try {
27
+ phrases = new HashSet<String>();
28
+ List<String> taggedSentences = FileOperations.readFile(trainingFile);
29
+ for (String taggedSentence : taggedSentences) {
30
+ phrases.addAll(getCueWords(taggedSentence, beginTag, interTag, otherTag));
31
+ }
32
+ FileOperations.writeFile(modelFile, new ArrayList<String>(phrases));
33
+ } catch (Exception ex) {
34
+ Logger.getLogger(BaselineAnnotator.class.getName()).log(Level.SEVERE, null, ex);
35
+ }
36
+ }
37
+
38
+ @Override
39
+ public void loadAnnotator(String modelFile) {
40
+ try {
41
+ phrases = new HashSet<String>();
42
+ phrases.addAll(FileOperations.readFile(modelFile));
43
+ } catch (Exception ex) {
44
+ Logger.getLogger(BaselineAnnotator.class.getName()).log(Level.SEVERE, null, ex);
45
+ }
46
+ }
47
+
48
+ /**
49
+ * Gets the set of cue word phrases in the given sentence. The given
50
+ * sentence is tagged according to Abner's specifications
51
+ * @param abnerTaggedSentence sentence tagged by abner's specification.
52
+ * @param beginTag the tag to mark the beginning of the cue
53
+ * @param intermediateTag the tag to mark intermediate portions
54
+ * @param otherTag the other tag
55
+ * @return the set of cue words or phrases in the given sentence
56
+ */
57
+ public static Set<String> getCueWords(String abnerTaggedSentence, String beginTag, String intermediateTag, String otherTag) {
58
+ Set<String> cueWordsPhrases = new HashSet<String>(1);
59
+ String[] elements = abnerTaggedSentence.split(" +");
60
+ boolean collect = false;
61
+ StringBuilder collectedPhrase = new StringBuilder();
62
+ for (String element : elements) {
63
+ String[] elementTokens = element.split("\\|");
64
+ String word = elementTokens[0];
65
+ String tag = elementTokens[1];
66
+ if (tag.equalsIgnoreCase(beginTag)) {
67
+ collect = true;
68
+ collectedPhrase.append(word).append(" ");
69
+ } else if (tag.equalsIgnoreCase(intermediateTag)) {
70
+ collectedPhrase.append(word).append(" ");
71
+ } else if (tag.equalsIgnoreCase(otherTag) && collect) {
72
+ collect = false;
73
+ cueWordsPhrases.add(collectedPhrase.toString().trim().toLowerCase());
74
+ collectedPhrase.delete(0, collectedPhrase.length() - 1);
75
+ }
76
+ }
77
+ return cueWordsPhrases;
78
+ }
79
+
80
+ }
@@ -0,0 +1,84 @@
1
+ package lingscope.algorithms;
2
+
3
+ import java.util.HashSet;
4
+ import java.util.Set;
5
+ import lingscope.structures.AnnotatedSentence;
6
+
7
+ /**
8
+ *
9
+ * @author shashank
10
+ */
11
+ public class BaselineCueAnnotator extends BaselineAnnotator {
12
+
13
+ public BaselineCueAnnotator(String beginTag, String interTag, String otherTag) {
14
+ super(beginTag, interTag, otherTag);
15
+ }
16
+
17
+ @Override
18
+ public AnnotatedSentence annotateSentence(String sentence, boolean isTokenized) {
19
+ if (phrases == null) {
20
+ throw new RuntimeException("Annotator has not been loaded");
21
+ }
22
+ if (!isTokenized) {
23
+ sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
24
+ }
25
+ String lcSentence = sentence.toLowerCase();
26
+ String[] words = sentence.split(" +");
27
+ int wordsLength = words.length;
28
+
29
+ Set<Integer> addITag = new HashSet<Integer>();
30
+ Set<Integer> addBTag = new HashSet<Integer>();
31
+
32
+ // Collect all indices where beginning and intermediate tags should
33
+ // be added
34
+ for (String phrase : phrases) {
35
+ if (!lcSentence.contains(phrase)) {
36
+ continue;
37
+ }
38
+ String[] phraseWords = phrase.split(" +");
39
+
40
+ for (int wordCounter = 0; wordCounter < wordsLength; ++wordCounter) {
41
+ String word = words[wordCounter];
42
+ if (word.equalsIgnoreCase(phraseWords[0])) {
43
+ boolean phraseMatches = true;
44
+ for (int j = 0; j < phraseWords.length; ++j) {
45
+ int i = j + wordCounter;
46
+ if (i == wordsLength) {
47
+ phraseMatches = false;
48
+ break;
49
+ }
50
+ if (!phraseWords[j].equalsIgnoreCase(words[i])) {
51
+ phraseMatches = false;
52
+ break;
53
+ }
54
+ }
55
+
56
+ if (phraseMatches) {
57
+ addBTag.add(wordCounter);
58
+ for (int j = 1; j < phraseWords.length; ++j) {
59
+ addITag.add(j + wordCounter);
60
+ }
61
+ }
62
+ }
63
+ }
64
+ }
65
+
66
+ // Create a tagged sentence. Give preference to beginning tag over
67
+ // intermediate tag in case they clash
68
+ StringBuilder taggedSentence = new StringBuilder();
69
+ for (int i = 0; i < wordsLength; ++i) {
70
+ String word = words[i];
71
+ if (addBTag.contains(i)) {
72
+ taggedSentence.append(" ").append(word).append("|").append(beginTag);
73
+ } else if (addITag.contains(i)) {
74
+ taggedSentence.append(" ").append(word).append("|").append(interTag);
75
+ } else {
76
+ taggedSentence.append(" ").append(word).append("|").append(otherTag);
77
+ }
78
+ }
79
+
80
+ String raw = taggedSentence.substring(1);
81
+ return new AnnotatedSentence(raw);
82
+ }
83
+
84
+ }
@@ -0,0 +1,101 @@
1
+ package lingscope.algorithms;
2
+
3
+ import java.util.HashSet;
4
+ import java.util.Set;
5
+ import java.util.regex.Matcher;
6
+ import java.util.regex.Pattern;
7
+ import lingscope.structures.AnnotatedSentence;
8
+
9
+ /**
10
+ *
11
+ * @author shashank
12
+ */
13
+ public class BaselineScopeAnnotator extends BaselineAnnotator {
14
+
15
+ protected Pattern endPattern;
16
+
17
+ public BaselineScopeAnnotator(String beginTag, String interTag, String otherTag, boolean commaLimit, boolean periodLimit) {
18
+ super(beginTag, interTag, otherTag);
19
+ String endPatternString = "";
20
+ if (commaLimit && periodLimit) {
21
+ endPatternString = ",|\\.";
22
+ } else if (commaLimit) {
23
+ endPatternString = ",";
24
+ } else if (periodLimit) {
25
+ endPatternString = "\\.";
26
+ }
27
+ endPattern = Pattern.compile(endPatternString); // End tagging if a comma or period is seen
28
+
29
+ }
30
+
31
+ @Override
32
+ public AnnotatedSentence annotateSentence(String sentence, boolean isTokenized) {
33
+ if (phrases == null) {
34
+ throw new RuntimeException("Annotator has not been loaded");
35
+ }
36
+ if (!isTokenized) {
37
+ sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
38
+ }
39
+ String lcSentence = sentence.toLowerCase();
40
+ String[] words = sentence.split(" +");
41
+ int wordsLength = words.length;
42
+
43
+ Set<Integer> addBTag = new HashSet<Integer>();
44
+
45
+ // Collect all indices where beginning and intermediate tags should
46
+ // be added
47
+ for (String phrase : phrases) {
48
+ if (!lcSentence.contains(phrase)) {
49
+ continue;
50
+ }
51
+ String[] phraseWords = phrase.split(" +");
52
+
53
+ for (int wordCounter = 0; wordCounter < wordsLength; ++wordCounter) {
54
+ String word = words[wordCounter];
55
+ if (word.equalsIgnoreCase(phraseWords[0])) {
56
+ boolean phraseMatches = true;
57
+ for (int j = 0; j < phraseWords.length; ++j) {
58
+ int i = j + wordCounter;
59
+ if (i == wordsLength) {
60
+ phraseMatches = false;
61
+ break;
62
+ }
63
+ if (!phraseWords[j].equalsIgnoreCase(words[i])) {
64
+ phraseMatches = false;
65
+ break;
66
+ }
67
+ }
68
+
69
+ if (phraseMatches) {
70
+ addBTag.add(wordCounter);
71
+ }
72
+ }
73
+ }
74
+ }
75
+
76
+ // Create a tagged sentence. Give preference to beginning tag over
77
+ // intermediate tag in case they clash
78
+ StringBuilder taggedSentence = new StringBuilder();
79
+ boolean taggerOn = false;
80
+ for (int i = 0; i < wordsLength; ++i) {
81
+ String word = words[i];
82
+ if (addBTag.contains(i)) {
83
+ taggedSentence.append(" ").append(word).append("|").append(beginTag);
84
+ taggerOn = true;
85
+ } else if (taggerOn) {
86
+ Matcher endMatch = endPattern.matcher(word);
87
+ if (endMatch.matches()) {
88
+ taggedSentence.append(" ").append(word).append("|").append(otherTag);
89
+ taggerOn = false;
90
+ } else {
91
+ taggedSentence.append(" ").append(word).append("|").append(interTag);
92
+ }
93
+ } else {
94
+ taggedSentence.append(" ").append(word).append("|").append(otherTag);
95
+ }
96
+ }
97
+
98
+ String raw = taggedSentence.substring(1);
99
+ return new AnnotatedSentence(raw);
100
+ }
101
+ }
@@ -0,0 +1,45 @@
1
+ package lingscope.algorithms;
2
+
3
+ import abner.Tagger;
4
+ import abner.Trainer;
5
+ import java.io.File;
6
+ import lingscope.structures.AnnotatedSentence;
7
+
8
+ /**
9
+ * A CRF based annotator
10
+ * @author shashank
11
+ */
12
+ public class CrfAnnotator extends Annotator {
13
+
14
+ private Tagger tagger;
15
+
16
+ public CrfAnnotator(String beginTag, String interTag, String otherTag) {
17
+ super(beginTag, interTag, otherTag);
18
+ tagger = null;
19
+ }
20
+
21
+ @Override
22
+ public void serializeAnnotator(String trainingFile, String modelFile) {
23
+ Trainer trainer = new Trainer();
24
+ trainer.train(trainingFile, modelFile);
25
+ loadAnnotator(modelFile);
26
+ }
27
+
28
+ @Override
29
+ public AnnotatedSentence annotateSentence(String sentence, boolean isTokenized) {
30
+ if (tagger == null) {
31
+ throw new RuntimeException("Tagger has not been loaded");
32
+ }
33
+ if (!isTokenized) {
34
+ sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
35
+ }
36
+ String raw = tagger.tagABNER(sentence).trim();
37
+ return new AnnotatedSentence(raw);
38
+ }
39
+
40
+ @Override
41
+ public void loadAnnotator(String modelFile) {
42
+ tagger = new Tagger(new File(modelFile));
43
+ tagger.setTokenization(false);
44
+ }
45
+ }
@@ -0,0 +1,52 @@
1
+ /*
2
+ * To change this template, choose Tools | Templates
3
+ * and open the template in the editor.
4
+ */
5
+
6
+ package lingscope.algorithms;
7
+
8
+ import java.io.File;
9
+ import java.io.FileNotFoundException;
10
+ import java.util.ArrayList;
11
+ import java.util.Scanner;
12
+ import java.util.Set;
13
+ import java.util.logging.Level;
14
+ import java.util.logging.Logger;
15
+ import lingscope.algorithms.negex.GenNegEx;
16
+ import lingscope.structures.AnnotatedSentence;
17
+
18
+ /**
19
+ * Annotates negation using Negex
20
+ * @author shashank
21
+ */
22
+ public abstract class NegexAnnotator extends Annotator {
23
+
24
+ protected GenNegEx negex;
25
+ protected ArrayList<String> rules;
26
+
27
+ public NegexAnnotator(String beginTag, String interTag, String otherTag) {
28
+ super(beginTag, interTag, otherTag);
29
+ negex = null;
30
+ }
31
+
32
+ @Override
33
+ public void serializeAnnotator(String trainingFile, String modelFile) {
34
+ throw new UnsupportedOperationException("NegEx's serialized version can be downloaded from the internet.");
35
+ }
36
+
37
+ @Override
38
+ public void loadAnnotator(String modelFile) {
39
+ try {
40
+ negex = new GenNegEx();
41
+ File ruleFile = new File(modelFile);
42
+ Scanner sc = new Scanner(ruleFile);
43
+ rules = new ArrayList();
44
+ while (sc.hasNextLine()) {
45
+ rules.add(sc.nextLine());
46
+ }
47
+ sc.close();
48
+ } catch (FileNotFoundException ex) {
49
+ Logger.getLogger(NegexAnnotator.class.getName()).log(Level.SEVERE, null, ex);
50
+ }
51
+ }
52
+ }
@@ -0,0 +1,26 @@
1
+ package lingscope.algorithms;
2
+
3
+ import lingscope.structures.AnnotatedSentence;
4
+
5
+ /**
6
+ *
7
+ * @author shashank
8
+ */
9
+ public class NegexCueAnnotator extends NegexAnnotator {
10
+
11
+ public NegexCueAnnotator(String beginTag, String interTag, String otherTag) {
12
+ super(beginTag, interTag, otherTag);
13
+ }
14
+
15
+ @Override
16
+ public AnnotatedSentence annotateSentence(String sentence, boolean isTokenized) {
17
+ if (negex == null) {
18
+ throw new RuntimeException("Annotator has not been loaded");
19
+ }
20
+ if (!isTokenized) {
21
+ sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
22
+ }
23
+ String raw = negex.getCue(sentence, rules, beginTag, interTag, otherTag);
24
+ return new AnnotatedSentence(raw);
25
+ }
26
+ }