abstractor 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/MIT-LICENSE +20 -0
- data/README.md +14 -0
- data/Rakefile +33 -0
- data/app/assets/images/abstractor/add.png +0 -0
- data/app/assets/images/abstractor/ajax-loader.gif +0 -0
- data/app/assets/images/abstractor/bar.gif +0 -0
- data/app/assets/images/abstractor/bti_logo.jpg +0 -0
- data/app/assets/images/abstractor/bullet_arrow_down.png +0 -0
- data/app/assets/images/abstractor/bullet_arrow_up.png +0 -0
- data/app/assets/images/abstractor/cog.png +0 -0
- data/app/assets/images/abstractor/delete.png +0 -0
- data/app/assets/images/abstractor/edit.png +0 -0
- data/app/assets/images/abstractor/excel.png +0 -0
- data/app/assets/images/abstractor/favicon.ico +0 -0
- data/app/assets/images/abstractor/greencheck.gif +0 -0
- data/app/assets/images/abstractor/loading.gif +0 -0
- data/app/assets/images/abstractor/nu_logo.jpg +0 -0
- data/app/assets/images/abstractor/nubic_logo.png +0 -0
- data/app/assets/images/abstractor/page.png +0 -0
- data/app/assets/images/abstractor/rails.png +0 -0
- data/app/assets/images/abstractor/robert_h_lurie.jpg +0 -0
- data/app/assets/images/abstractor/show.png +0 -0
- data/app/assets/images/abstractor/switch_minus.gif +0 -0
- data/app/assets/images/abstractor/switch_plus.gif +0 -0
- data/app/assets/javascripts/abstractor/abstractor.js +89 -0
- data/app/assets/javascripts/abstractor/application.js +21 -0
- data/app/assets/javascripts/abstractor/combobox.js +301 -0
- data/app/assets/javascripts/abstractor/jquery/jquery.form.js +1074 -0
- data/app/assets/javascripts/abstractor/nested_attributes.js +69 -0
- data/app/assets/stylesheets/abstractor/abstractor_abstractions.css +312 -0
- data/app/assets/stylesheets/abstractor/application.css +21 -0
- data/app/assets/stylesheets/abstractor/jquery.ui.override.css.scss +73 -0
- data/app/controllers/abstractor/abstractor_abstraction_groups_controller.rb +5 -0
- data/app/controllers/abstractor/abstractor_abstractions_controller.rb +5 -0
- data/app/controllers/abstractor/abstractor_suggestions_controller.rb +5 -0
- data/app/controllers/abstractor/application_controller.rb +2 -0
- data/app/helpers/abstractor/application_helper.rb +4 -0
- data/app/models/abstractor/abstractor_abstraction.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_group.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_group_member.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema_object_value.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema_predicate_variant.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema_relation.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_source.rb +6 -0
- data/app/models/abstractor/abstractor_object_type.rb +6 -0
- data/app/models/abstractor/abstractor_object_value.rb +6 -0
- data/app/models/abstractor/abstractor_object_value_variant.rb +6 -0
- data/app/models/abstractor/abstractor_relation_type.rb +6 -0
- data/app/models/abstractor/abstractor_rule_type.rb +6 -0
- data/app/models/abstractor/abstractor_subject.rb +6 -0
- data/app/models/abstractor/abstractor_subject_group.rb +6 -0
- data/app/models/abstractor/abstractor_subject_group_member.rb +6 -0
- data/app/models/abstractor/abstractor_subject_relation.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion_object_value.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion_source.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion_status.rb +6 -0
- data/app/views/abstractor/abstractor_abstraction_groups/_form.html.haml +10 -0
- data/app/views/abstractor/abstractor_abstraction_groups/edit.html.haml +1 -0
- data/app/views/abstractor/abstractor_abstractions/_fields.html.haml +63 -0
- data/app/views/abstractor/abstractor_abstractions/_list.html.haml +45 -0
- data/app/views/abstractor/abstractor_abstractions/edit.html.haml +53 -0
- data/app/views/abstractor/abstractor_abstractions/show.html.haml +1 -0
- data/app/views/abstractor/shared/_error_messages.html.haml +5 -0
- data/config/cucumber.yml +8 -0
- data/config/routes.rb +7 -0
- data/db/migrate/20131227205140_create_abstractor_object_types.rb +10 -0
- data/db/migrate/20131227205219_create_abstractor_object_values.rb +10 -0
- data/db/migrate/20131227205256_create_abstractor_object_value_variants.rb +11 -0
- data/db/migrate/20131227205320_create_abstractor_relation_types.rb +10 -0
- data/db/migrate/20131227205354_create_abstractor_rule_types.rb +11 -0
- data/db/migrate/20131227205432_create_abstractor_abstraction_schemas.rb +13 -0
- data/db/migrate/20131227205456_create_abstractor_abstraction_schema_object_values.rb +10 -0
- data/db/migrate/20131227205529_create_abstractor_abstraction_schema_predicate_variants.rb +11 -0
- data/db/migrate/20131227205610_create_abstractor_abstraction_schema_relations.rb +11 -0
- data/db/migrate/20131227205652_create_abstractor_subjects.rb +12 -0
- data/db/migrate/20131227205732_create_abstractor_subject_relations.rb +11 -0
- data/db/migrate/20131227205831_create_abstractor_abstraction_sources.rb +11 -0
- data/db/migrate/20131227210211_create_abstractor_subject_groups.rb +10 -0
- data/db/migrate/20131227210244_create_abstractor_subject_group_members.rb +11 -0
- data/db/migrate/20131227210350_create_abstractor_abstraction_groups.rb +12 -0
- data/db/migrate/20131227210353_create_abstractor_abstraction_group_members.rb +11 -0
- data/db/migrate/20131227211050_create_abstractor_suggestion_statuses.rb +10 -0
- data/db/migrate/20131227211303_create_abstractor_abstractions.rb +15 -0
- data/db/migrate/20131227213427_create_abstractor_suggestions.rb +14 -0
- data/db/migrate/20131228041944_create_abstractor_suggestion_object_values.rb +11 -0
- data/db/migrate/20131228041945_create_abstractor_suggestion_sources.rb +16 -0
- data/db/seeds.rb +0 -0
- data/lib/abstractor.rb +8 -0
- data/lib/abstractor/abstractable.rb +190 -0
- data/lib/abstractor/core_ext/string.rb +99 -0
- data/lib/abstractor/engine.rb +14 -0
- data/lib/abstractor/methods/controllers/abstractor_abstraction_groups_controller.rb +37 -0
- data/lib/abstractor/methods/controllers/abstractor_abstractions_controller.rb +42 -0
- data/lib/abstractor/methods/controllers/abstractor_suggestions_controller.rb +28 -0
- data/lib/abstractor/methods/models/abstractor_abstraction.rb +65 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_group.rb +37 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_group_member.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema.rb +27 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema_object_value.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema_predicate_variant.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema_relation.rb +18 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_source.rb +29 -0
- data/lib/abstractor/methods/models/abstractor_object_type.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_object_value.rb +24 -0
- data/lib/abstractor/methods/models/abstractor_object_value_variant.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_relation_type.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_rule_type.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_subject.rb +258 -0
- data/lib/abstractor/methods/models/abstractor_subject_group.rb +19 -0
- data/lib/abstractor/methods/models/abstractor_subject_group_member.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_subject_relation.rb +18 -0
- data/lib/abstractor/methods/models/abstractor_suggestion.rb +88 -0
- data/lib/abstractor/methods/models/abstractor_suggestion_object_value.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_suggestion_source.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_suggestion_status.rb +29 -0
- data/lib/abstractor/methods/models/soft_delete.rb +35 -0
- data/lib/abstractor/negation_detection.rb +43 -0
- data/lib/abstractor/parser.rb +76 -0
- data/lib/abstractor/setup.rb +24 -0
- data/lib/abstractor/user_interface.rb +40 -0
- data/lib/abstractor/utility.rb +8 -0
- data/lib/abstractor/version.rb +3 -0
- data/lib/generators/abstractor/install/install_generator.rb +118 -0
- data/lib/generators/abstractor/install/templates/controllers/abstractor_abstraction_groups_controller.rb +16 -0
- data/lib/generators/abstractor/install/templates/controllers/abstractor_abstractions_controller.rb +24 -0
- data/lib/generators/abstractor/install/templates/controllers/abstractor_suggestions_controller.rb +12 -0
- data/lib/generators/abstractor/install/templates/dictionaries.yml +235 -0
- data/lib/generators/abstractor/install/templates/helpers/abstractions_helper.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group_member.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_object_value.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_predicate_variant.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_relation.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_source.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_object_type.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_object_value.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_object_value_variant.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_relation_type.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_rule_type.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject_group.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject_group_member.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject_relation.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_object_value.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_source.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_status.rb +9 -0
- data/lib/lingscope/build.xml +74 -0
- data/lib/lingscope/build/built-jar.properties +4 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/AbnerTokenizer.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/AnnotationComparer.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/Annotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/BaselineAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/BaselineCueAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/BaselineScopeAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/CrfAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/NegexAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/NegexCueAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/NegexScopeAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/PosTagger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/negex/GenNegEx.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/negex/Sorter.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/AnnotatedFilesMerger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/AnnotationComparerDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/BaselineDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/CrfDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/CueAndPosFilesMerger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/ModelTrainer.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/NegexDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/PosTaggerDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/SentencePosTagger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/SentenceTagger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/io/AnnotatedSentencesIO.class +0 -0
- data/lib/lingscope/build/classes/lingscope/structures/AnnotatedSentence.class +0 -0
- data/lib/lingscope/dist/README.TXT +32 -0
- data/lib/lingscope/dist/javadoc/allclasses-frame.html +80 -0
- data/lib/lingscope/dist/javadoc/allclasses-noframe.html +80 -0
- data/lib/lingscope/dist/javadoc/constant-values.html +199 -0
- data/lib/lingscope/dist/javadoc/deprecated-list.html +147 -0
- data/lib/lingscope/dist/javadoc/help-doc.html +224 -0
- data/lib/lingscope/dist/javadoc/index-files/index-1.html +188 -0
- data/lib/lingscope/dist/javadoc/index-files/index-10.html +149 -0
- data/lib/lingscope/dist/javadoc/index-files/index-11.html +158 -0
- data/lib/lingscope/dist/javadoc/index-files/index-12.html +157 -0
- data/lib/lingscope/dist/javadoc/index-files/index-13.html +177 -0
- data/lib/lingscope/dist/javadoc/index-files/index-14.html +155 -0
- data/lib/lingscope/dist/javadoc/index-files/index-15.html +152 -0
- data/lib/lingscope/dist/javadoc/index-files/index-16.html +146 -0
- data/lib/lingscope/dist/javadoc/index-files/index-2.html +158 -0
- data/lib/lingscope/dist/javadoc/index-files/index-3.html +165 -0
- data/lib/lingscope/dist/javadoc/index-files/index-4.html +146 -0
- data/lib/lingscope/dist/javadoc/index-files/index-5.html +219 -0
- data/lib/lingscope/dist/javadoc/index-files/index-6.html +149 -0
- data/lib/lingscope/dist/javadoc/index-files/index-7.html +155 -0
- data/lib/lingscope/dist/javadoc/index-files/index-8.html +185 -0
- data/lib/lingscope/dist/javadoc/index-files/index-9.html +164 -0
- data/lib/lingscope/dist/javadoc/index.html +74 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/AbnerTokenizer.html +280 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/AnnotationComparer.html +526 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/Annotator.html +401 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineAnnotator.html +375 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineCueAnnotator.html +309 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineScopeAnnotator.html +340 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/CrfAnnotator.html +340 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexAnnotator.html +364 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexCueAnnotator.html +309 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexScopeAnnotator.html +309 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/PosTagger.html +268 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AbnerTokenizer.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AnnotationComparer.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/Annotator.html +299 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineAnnotator.html +189 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineCueAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineScopeAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/CrfAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexAnnotator.html +189 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexCueAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexScopeAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/PosTagger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/GenNegEx.html +369 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/Sorter.html +253 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/GenNegEx.html +181 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/Sorter.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-frame.html +35 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-summary.html +165 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-use.html +174 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-frame.html +53 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-summary.html +198 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-tree.html +160 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-use.html +202 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotatedFilesMerger.html +284 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotationComparerDriver.html +258 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/BaselineDriver.html +260 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/CrfDriver.html +262 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/CueAndPosFilesMerger.html +310 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/ModelTrainer.html +281 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/NegexDriver.html +262 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/PosTaggerDriver.html +289 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/SentencePosTagger.html +313 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/SentenceTagger.html +512 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotatedFilesMerger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotationComparerDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/BaselineDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CrfDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CueAndPosFilesMerger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/ModelTrainer.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/NegexDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/PosTaggerDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentencePosTagger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentenceTagger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-frame.html +51 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-summary.html +195 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-use.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/AnnotatedSentencesIO.html +285 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/class-use/AnnotatedSentencesIO.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-frame.html +33 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-summary.html +158 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-use.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/AnnotatedSentence.html +425 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/class-use/AnnotatedSentence.html +423 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-frame.html +33 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-summary.html +158 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-use.html +209 -0
- data/lib/lingscope/dist/javadoc/overview-frame.html +51 -0
- data/lib/lingscope/dist/javadoc/overview-summary.html +168 -0
- data/lib/lingscope/dist/javadoc/overview-tree.html +159 -0
- data/lib/lingscope/dist/javadoc/package-list +5 -0
- data/lib/lingscope/dist/javadoc/resources/inherit.gif +0 -0
- data/lib/lingscope/dist/javadoc/stylesheet.css +29 -0
- data/lib/lingscope/dist/lib/abner.jar +0 -0
- data/lib/lingscope/dist/lib/commons-lang-2.4.jar +0 -0
- data/lib/lingscope/dist/lib/java_utils.jar +0 -0
- data/lib/lingscope/dist/lib/sptoolkit.jar +0 -0
- data/lib/lingscope/dist/lib/stanford-postagger.jar +0 -0
- data/lib/lingscope/dist/lingscope.jar +0 -0
- data/lib/lingscope/lingscope.zip +0 -0
- data/lib/lingscope/manifest.mf +3 -0
- data/lib/lingscope/nbproject/build-impl.xml +1338 -0
- data/lib/lingscope/nbproject/genfiles.properties +8 -0
- data/lib/lingscope/nbproject/private/config.properties +0 -0
- data/lib/lingscope/nbproject/private/private.properties +10 -0
- data/lib/lingscope/nbproject/private/private.xml +5 -0
- data/lib/lingscope/nbproject/project.properties +85 -0
- data/lib/lingscope/nbproject/project.xml +15 -0
- data/lib/lingscope/negation_models.zip +0 -0
- data/lib/lingscope/negation_models/baseline_cue_all_both.model +49 -0
- data/lib/lingscope/negation_models/baseline_cue_all_clinical.model +19 -0
- data/lib/lingscope/negation_models/crf_cue_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_cue_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_crf_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_crf_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_regex_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_regex_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_crf_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_crf_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_regex_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_regex_all_clinical.model +0 -0
- data/lib/lingscope/src/lingscope/algorithms/AbnerTokenizer.java +58 -0
- data/lib/lingscope/src/lingscope/algorithms/AnnotationComparer.java +207 -0
- data/lib/lingscope/src/lingscope/algorithms/Annotator.java +51 -0
- data/lib/lingscope/src/lingscope/algorithms/BaselineAnnotator.java +80 -0
- data/lib/lingscope/src/lingscope/algorithms/BaselineCueAnnotator.java +84 -0
- data/lib/lingscope/src/lingscope/algorithms/BaselineScopeAnnotator.java +101 -0
- data/lib/lingscope/src/lingscope/algorithms/CrfAnnotator.java +45 -0
- data/lib/lingscope/src/lingscope/algorithms/NegexAnnotator.java +52 -0
- data/lib/lingscope/src/lingscope/algorithms/NegexCueAnnotator.java +26 -0
- data/lib/lingscope/src/lingscope/algorithms/NegexScopeAnnotator.java +26 -0
- data/lib/lingscope/src/lingscope/algorithms/PosTagger.java +54 -0
- data/lib/lingscope/src/lingscope/algorithms/negex/GenNegEx.java +530 -0
- data/lib/lingscope/src/lingscope/algorithms/negex/Sorter.java +36 -0
- data/lib/lingscope/src/lingscope/drivers/AnnotatedFilesMerger.java +61 -0
- data/lib/lingscope/src/lingscope/drivers/AnnotationComparerDriver.java +22 -0
- data/lib/lingscope/src/lingscope/drivers/BaselineDriver.java +45 -0
- data/lib/lingscope/src/lingscope/drivers/CrfDriver.java +31 -0
- data/lib/lingscope/src/lingscope/drivers/CueAndPosFilesMerger.java +86 -0
- data/lib/lingscope/src/lingscope/drivers/ModelTrainer.java +39 -0
- data/lib/lingscope/src/lingscope/drivers/NegexDriver.java +32 -0
- data/lib/lingscope/src/lingscope/drivers/PosTaggerDriver.java +62 -0
- data/lib/lingscope/src/lingscope/drivers/SentencePosTagger.java +89 -0
- data/lib/lingscope/src/lingscope/drivers/SentenceTagger.java +158 -0
- data/lib/lingscope/src/lingscope/io/AnnotatedSentencesIO.java +53 -0
- data/lib/lingscope/src/lingscope/structures/AnnotatedSentence.java +105 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.csv +1 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.txt +2273 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.csv +1 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.txt +1342 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/icd-o3 readme.txt +21 -0
- data/lib/setup/data/custom_site_synonyms.csv +1 -0
- data/lib/setup/data/diagnosis_categorizations.csv +1 -0
- data/lib/setup/data/high_level_primary_cns_diagnosis_categorizations.yml +597 -0
- data/lib/setup/data/icdo3_sites_with_laterality.txt +11 -0
- data/lib/setup/data/site_site_categories.txt +28 -0
- data/lib/tasks/abstractor_tasks.rake +22 -0
- data/lib/tasks/cucumber.rake +65 -0
- metadata +754 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
package lingscope.algorithms;
|
|
2
|
+
|
|
3
|
+
import lingscope.structures.AnnotatedSentence;
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
*
|
|
7
|
+
* @author shashank
|
|
8
|
+
*/
|
|
9
|
+
public class NegexScopeAnnotator extends NegexAnnotator {
|
|
10
|
+
|
|
11
|
+
public NegexScopeAnnotator(String beginTag, String interTag, String otherTag) {
|
|
12
|
+
super(beginTag, interTag, otherTag);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
@Override
|
|
16
|
+
public AnnotatedSentence annotateSentence(String sentence, boolean isTokenized) {
|
|
17
|
+
if (negex == null) {
|
|
18
|
+
throw new RuntimeException("Annotator has not been loaded");
|
|
19
|
+
}
|
|
20
|
+
if (!isTokenized) {
|
|
21
|
+
sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
|
|
22
|
+
}
|
|
23
|
+
String raw = negex.getScope(sentence, rules, beginTag, interTag, otherTag);
|
|
24
|
+
return new AnnotatedSentence(raw);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
package lingscope.algorithms;
|
|
2
|
+
|
|
3
|
+
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
|
|
4
|
+
import java.util.ArrayList;
|
|
5
|
+
import java.util.List;
|
|
6
|
+
import java.util.logging.Level;
|
|
7
|
+
import java.util.logging.Logger;
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Part of speech tagger
|
|
11
|
+
* @author shashank
|
|
12
|
+
*/
|
|
13
|
+
public class PosTagger {
|
|
14
|
+
|
|
15
|
+
private MaxentTagger posTagger;
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Creates an instance of POS tagger by loading the given grammar file
|
|
19
|
+
* @param grammarFile
|
|
20
|
+
*/
|
|
21
|
+
public PosTagger(String grammarFile) {
|
|
22
|
+
try {
|
|
23
|
+
posTagger = new MaxentTagger(grammarFile);
|
|
24
|
+
} catch (Exception ex) {
|
|
25
|
+
Logger.getLogger(PosTagger.class.getName()).log(Level.SEVERE, null, ex);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Takes a sentence as input and returns list of POS tags associated with
|
|
31
|
+
* each word in the sentence
|
|
32
|
+
* @param sentence
|
|
33
|
+
* @param isTokenized
|
|
34
|
+
* @return
|
|
35
|
+
*/
|
|
36
|
+
public List<String> replaceWordsWithPos(String sentence, boolean isTokenized) {
|
|
37
|
+
if (!isTokenized) {
|
|
38
|
+
sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
|
|
39
|
+
}
|
|
40
|
+
List<String> ret = new ArrayList<String>();
|
|
41
|
+
String tagged = "";
|
|
42
|
+
try {
|
|
43
|
+
tagged = posTagger.tagString(sentence);
|
|
44
|
+
} catch (Exception ex) {
|
|
45
|
+
Logger.getLogger(PosTagger.class.getName()).log(Level.SEVERE, null, ex);
|
|
46
|
+
}
|
|
47
|
+
for (String wordTag : tagged.split(" +")) {
|
|
48
|
+
String[] tags = wordTag.split("/");
|
|
49
|
+
String tag = tags[tags.length - 1];
|
|
50
|
+
ret.add(tag);
|
|
51
|
+
}
|
|
52
|
+
return ret;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
@@ -0,0 +1,530 @@
|
|
|
1
|
+
package lingscope.algorithms.negex;
|
|
2
|
+
|
|
3
|
+
import java.util.regex.Matcher;
|
|
4
|
+
import java.util.regex.Pattern;
|
|
5
|
+
import java.util.*;
|
|
6
|
+
|
|
7
|
+
/***************************************************************************************
|
|
8
|
+
* Author: Imre Solti
|
|
9
|
+
* Date: 09/15/2008
|
|
10
|
+
* Modified: 04/15/2009
|
|
11
|
+
* Changed to specifications of test kit and discussions with WC and PH.
|
|
12
|
+
* Modified: 04/26/2009
|
|
13
|
+
* Fixed the deletion of last character in scope fo PREN, PREP negation scopes.
|
|
14
|
+
*
|
|
15
|
+
* Wendy Chapman's NegEx algorithm in Java.
|
|
16
|
+
*
|
|
17
|
+
* Sentence boundaries serve as WINDOW for negation (suggested by Wendy Chapman)
|
|
18
|
+
*
|
|
19
|
+
****************************************************************************************/
|
|
20
|
+
|
|
21
|
+
/*
|
|
22
|
+
Copyright 2008 Imre Solti
|
|
23
|
+
|
|
24
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
25
|
+
|
|
26
|
+
you may not use this file except in compliance with the License. You may obtain a copy of the License at
|
|
27
|
+
|
|
28
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
29
|
+
|
|
30
|
+
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
|
|
31
|
+
WITHOUT
|
|
32
|
+
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and
|
|
33
|
+
limitations under the License.
|
|
34
|
+
*/
|
|
35
|
+
public class GenNegEx {
|
|
36
|
+
|
|
37
|
+
private String previousRunScope;
|
|
38
|
+
private String[] sentenceWords;
|
|
39
|
+
private String[] tags;
|
|
40
|
+
|
|
41
|
+
public String getCue(String sentenceString, List<String> ruleStrings, String beginTag, String interTag, String otherTag) {
|
|
42
|
+
tagNegation(sentenceString, ruleStrings);
|
|
43
|
+
StringBuilder ret = new StringBuilder();
|
|
44
|
+
boolean inScope = false;
|
|
45
|
+
for (int i = 0; i < sentenceWords.length; ++i) {
|
|
46
|
+
String tag = tags[i];
|
|
47
|
+
String sentenceWord = sentenceWords[i];
|
|
48
|
+
ret.append(" ").append(sentenceWord).append("|");
|
|
49
|
+
if (tag.equalsIgnoreCase("cue")) {
|
|
50
|
+
if (inScope) {
|
|
51
|
+
ret.append(interTag);
|
|
52
|
+
} else {
|
|
53
|
+
ret.append(beginTag);
|
|
54
|
+
inScope = true;
|
|
55
|
+
}
|
|
56
|
+
} else {
|
|
57
|
+
ret.append(otherTag);
|
|
58
|
+
inScope = false;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return ret.substring(1);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
public String getScope(String sentenceString, List<String> ruleStrings, String beginTag, String interTag, String otherTag) {
|
|
65
|
+
tagNegation(sentenceString, ruleStrings);
|
|
66
|
+
StringBuilder ret = new StringBuilder();
|
|
67
|
+
boolean inScope = false;
|
|
68
|
+
Pattern punct = Pattern.compile("\\p{Punct}");
|
|
69
|
+
for (int i = 0; i < sentenceWords.length; ++i) {
|
|
70
|
+
String tag = tags[i];
|
|
71
|
+
String sentenceWord = sentenceWords[i];
|
|
72
|
+
ret.append(" ").append(sentenceWord).append("|");
|
|
73
|
+
Matcher m = punct.matcher(sentenceWord);
|
|
74
|
+
if (m.matches()) {
|
|
75
|
+
if (i + 1 == sentenceWords.length) {
|
|
76
|
+
ret.append(otherTag);
|
|
77
|
+
continue;
|
|
78
|
+
} else if (tags[i + 1].equalsIgnoreCase("other")) {
|
|
79
|
+
ret.append(otherTag);
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
if (tag.equalsIgnoreCase("cue") || tag.equalsIgnoreCase("scope")) {
|
|
84
|
+
if (inScope) {
|
|
85
|
+
ret.append(interTag);
|
|
86
|
+
} else {
|
|
87
|
+
ret.append(beginTag);
|
|
88
|
+
inScope = true;
|
|
89
|
+
}
|
|
90
|
+
} else {
|
|
91
|
+
ret.append(otherTag);
|
|
92
|
+
inScope = false;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return ret.substring(1);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
public void tagNegation(String sentenceString, List<String> ruleStrings) {
|
|
99
|
+
Sorter s = new Sorter();
|
|
100
|
+
sentenceWords = sentenceString.split("\\s+");
|
|
101
|
+
tags = new String[sentenceWords.length];
|
|
102
|
+
for (int i = 0; i < sentenceWords.length; ++i) {
|
|
103
|
+
tags[i] = "other";
|
|
104
|
+
}
|
|
105
|
+
String sToReturn = "";
|
|
106
|
+
String sScope = "";
|
|
107
|
+
List<String> sortedRules = new ArrayList<String>();
|
|
108
|
+
|
|
109
|
+
String filler = "__";
|
|
110
|
+
boolean negPoss = false;
|
|
111
|
+
|
|
112
|
+
// Sort the rules by length in descending order.
|
|
113
|
+
// Rules need to be sorted so the longest rule is always tried to match
|
|
114
|
+
// first.
|
|
115
|
+
// Some of the rules overlap so without sorting first shorter rules (some of them POSSIBLE or PSEUDO)
|
|
116
|
+
// would match before longer legitimate negation rules.
|
|
117
|
+
//
|
|
118
|
+
|
|
119
|
+
// There is efficiency issue here. It is better if rules are sorted by the
|
|
120
|
+
// calling program once and used without sorting in GennegEx.
|
|
121
|
+
sortedRules = s.sortRules(ruleStrings);
|
|
122
|
+
|
|
123
|
+
// Process the sentence and tag each matched negation
|
|
124
|
+
// rule with correct negation rule tag.
|
|
125
|
+
//
|
|
126
|
+
// At the same time check for the phrase that we want to decide
|
|
127
|
+
// the negation status for and
|
|
128
|
+
// tag the phrase with [PHRASE] ... [PHRASE]
|
|
129
|
+
// In both the negation rules and in the phrase replace white space
|
|
130
|
+
// with "filler" string. (This could cause problems if the sentences
|
|
131
|
+
// we study has "filler" on their own.)
|
|
132
|
+
|
|
133
|
+
// Sentence needs one character in the beginning and end to match.
|
|
134
|
+
// We remove the extra characters after processing.
|
|
135
|
+
String sentence = "." + sentenceString + ".";
|
|
136
|
+
|
|
137
|
+
// Tag the phrases we want to detect for negation.
|
|
138
|
+
// Should happen before rule detection.
|
|
139
|
+
|
|
140
|
+
Iterator<String> iRule = sortedRules.iterator();
|
|
141
|
+
while (iRule.hasNext()) {
|
|
142
|
+
String rule = iRule.next();
|
|
143
|
+
Pattern p = Pattern.compile("[\\t]+"); // Working.
|
|
144
|
+
String[] ruleTokens = p.split(rule.trim());
|
|
145
|
+
String[] ruleMembers = ruleTokens[0].trim().split(" ");
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
// Add the regular expression characters to tokens and asemble the rule again.
|
|
149
|
+
String rule2 = "";
|
|
150
|
+
for (int i = 0; i < ruleMembers.length; i++) {
|
|
151
|
+
if (!ruleMembers[i].equals("")) {
|
|
152
|
+
if (ruleMembers.length == 1) {
|
|
153
|
+
rule2 = ruleMembers[i];
|
|
154
|
+
} else {
|
|
155
|
+
rule2 = rule2 + ruleMembers[i].trim() + "\\s+";
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
// Remove the last s+
|
|
160
|
+
if (rule2.endsWith("\\s+")) {
|
|
161
|
+
rule2 = rule2.substring(0, rule2.lastIndexOf("\\s+"));
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
rule2 = "(?m)(?i)[[\\p{Punct}&&[^-_\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]";
|
|
165
|
+
// rule2 = "(?m)(?i)[[\\p{Punct}&&[^\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]";
|
|
166
|
+
|
|
167
|
+
Pattern p2 = Pattern.compile(rule2.trim());
|
|
168
|
+
Matcher m = p2.matcher(sentence);
|
|
169
|
+
Matcher m2 = p2.matcher(sentence);
|
|
170
|
+
if (m2.find()) {
|
|
171
|
+
/**
|
|
172
|
+
* Only PREN and POST rules matter
|
|
173
|
+
* added by Shashank
|
|
174
|
+
*/
|
|
175
|
+
if (ruleTokens[1].equalsIgnoreCase("[PREN]") || ruleTokens[1].equalsIgnoreCase("[POST]")) {
|
|
176
|
+
for (int i = 0; i < sentenceWords.length; ++i) {
|
|
177
|
+
boolean perfectMatch = true;
|
|
178
|
+
for (int j = 0; j < ruleMembers.length; ++j) {
|
|
179
|
+
if (!sentenceWords[i + j].equalsIgnoreCase(ruleMembers[j])) {
|
|
180
|
+
perfectMatch = false;
|
|
181
|
+
break;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
if (perfectMatch) {
|
|
185
|
+
for (int j = 0; j < ruleMembers.length; ++j) {
|
|
186
|
+
tags[i + j] = "cue";
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
while (m.find() == true) {
|
|
194
|
+
sentence = m.replaceAll(" " + ruleTokens[1].trim() + m.group().trim().replaceAll(" ", filler) + ruleTokens[1].trim() + " ");
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
// Exchange the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED]
|
|
200
|
+
// based of PREN, POST rules and if flag is set to true
|
|
201
|
+
// then based on PREP and POSP, as well.
|
|
202
|
+
|
|
203
|
+
// Because PRENEGATION [PREN} is checked first it takes precedent over
|
|
204
|
+
// POSTNEGATION [POST].
|
|
205
|
+
// Similarly POSTNEGATION [POST] takes precedent over POSSIBLE PRENEGATION [PREP]
|
|
206
|
+
// and [PREP] takes precedent over POSSIBLE POSTNEGATION [POSP].
|
|
207
|
+
|
|
208
|
+
String splitPattern = "\\s+|[^\\s]__";
|
|
209
|
+
Pattern pSpace = Pattern.compile(splitPattern);
|
|
210
|
+
String[] sentenceTokens = pSpace.split(sentence.trim());
|
|
211
|
+
StringBuilder sb = new StringBuilder();
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
// Check for [PREN]
|
|
215
|
+
for (int i = 0; i < sentenceTokens.length; i++) {
|
|
216
|
+
sb.append(" ").append(sentenceTokens[i].trim());
|
|
217
|
+
if (sentenceTokens[i].trim().startsWith("[PREN]")) {
|
|
218
|
+
|
|
219
|
+
for (int j = i + 1; j < sentenceTokens.length; j++) {
|
|
220
|
+
if (sentenceTokens[j].trim().startsWith("[CONJ]")
|
|
221
|
+
|| sentenceTokens[j].trim().startsWith("[PSEU]")
|
|
222
|
+
|| sentenceTokens[j].trim().startsWith("[POST]")
|
|
223
|
+
|| sentenceTokens[j].trim().startsWith("[PREP]")
|
|
224
|
+
|| sentenceTokens[j].trim().startsWith("[POSP]")) {
|
|
225
|
+
break;
|
|
226
|
+
}
|
|
227
|
+
if (!tags[j].equalsIgnoreCase("cue")) {
|
|
228
|
+
tags[j] = "scope";
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
sentence = sb.toString().trim();
|
|
235
|
+
pSpace = Pattern.compile(splitPattern);
|
|
236
|
+
sentenceTokens = pSpace.split(sentence);
|
|
237
|
+
StringBuilder sb2 = new StringBuilder();
|
|
238
|
+
|
|
239
|
+
// Check for [POST]
|
|
240
|
+
for (int i = sentenceTokens.length - 1; i > 0; i--) {
|
|
241
|
+
sb2.insert(0, sentenceTokens[i] + " ");
|
|
242
|
+
if (sentenceTokens[i].trim().startsWith("[POST]")) {
|
|
243
|
+
for (int j = i - 1; j > 0; j--) {
|
|
244
|
+
if (sentenceTokens[j].trim().startsWith("[CONJ]")
|
|
245
|
+
|| sentenceTokens[j].trim().startsWith("[PSEU]")
|
|
246
|
+
|| sentenceTokens[j].trim().startsWith("[PREN]")
|
|
247
|
+
|| sentenceTokens[j].trim().startsWith("[PREP]")
|
|
248
|
+
|| sentenceTokens[j].trim().startsWith("[POSP]")) {
|
|
249
|
+
break;
|
|
250
|
+
}
|
|
251
|
+
if (!tags[j].equalsIgnoreCase("cue")) {
|
|
252
|
+
tags[j] = "scope";
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
public String negCheck(String sentenceString, String phraseString, ArrayList ruleStrings,
|
|
260
|
+
boolean negatePossible) throws Exception {
|
|
261
|
+
|
|
262
|
+
Sorter s = new Sorter();
|
|
263
|
+
String sToReturn = "";
|
|
264
|
+
String sScope = "";
|
|
265
|
+
String sentencePortion = "";
|
|
266
|
+
|
|
267
|
+
String filler = "_";
|
|
268
|
+
boolean negPoss = negatePossible;
|
|
269
|
+
boolean negationScope = true;
|
|
270
|
+
|
|
271
|
+
// Sort the rules by length in descending order.
|
|
272
|
+
// Rules need to be sorted so the longest rule is always tried to match
|
|
273
|
+
// first.
|
|
274
|
+
// Some of the rules overlap so without sorting first shorter rules (some of them POSSIBLE or PSEUDO)
|
|
275
|
+
// would match before longer legitimate negation rules.
|
|
276
|
+
//
|
|
277
|
+
|
|
278
|
+
// There is efficiency issue here. It is better if rules are sorted by the
|
|
279
|
+
// calling program once and used without sorting in GennegEx.
|
|
280
|
+
List<String> sortedRules = s.sortRules(ruleStrings);
|
|
281
|
+
|
|
282
|
+
// Process the sentence and tag each matched negation
|
|
283
|
+
// rule with correct negation rule tag.
|
|
284
|
+
//
|
|
285
|
+
// At the same time check for the phrase that we want to decide
|
|
286
|
+
// the negation status for and
|
|
287
|
+
// tag the phrase with [PHRASE] ... [PHRASE]
|
|
288
|
+
// In both the negation rules and in the phrase replace white space
|
|
289
|
+
// with "filler" string. (This could cause problems if the sentences
|
|
290
|
+
// we study has "filler" on their own.)
|
|
291
|
+
|
|
292
|
+
// Sentence needs one character in the beginning and end to match.
|
|
293
|
+
// We remove the extra characters after processing.
|
|
294
|
+
String sentence = "." + sentenceString + ".";
|
|
295
|
+
|
|
296
|
+
// Tag the phrases we want to detect for negation.
|
|
297
|
+
// Should happen before rule detection.
|
|
298
|
+
String phrase = phraseString;
|
|
299
|
+
Pattern pph = Pattern.compile(phrase.trim(), Pattern.CASE_INSENSITIVE);
|
|
300
|
+
Matcher mph = pph.matcher(sentence);
|
|
301
|
+
|
|
302
|
+
while (mph.find() == true) {
|
|
303
|
+
sentence = mph.replaceAll(" [PHRASE]" + mph.group().trim().replaceAll(" ", filler) + "[PHRASE]");
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
Iterator iRule = sortedRules.iterator();
|
|
307
|
+
while (iRule.hasNext()) {
|
|
308
|
+
String rule = (String) iRule.next();
|
|
309
|
+
Pattern p = Pattern.compile("[\\t]+"); // Working.
|
|
310
|
+
String[] ruleTokens = p.split(rule.trim());
|
|
311
|
+
// Add the regular expression characters to tokens and asemble the rule again.
|
|
312
|
+
String[] ruleMembers = ruleTokens[0].trim().split(" ");
|
|
313
|
+
String rule2 = "";
|
|
314
|
+
for (int i = 0; i <= ruleMembers.length - 1; i++) {
|
|
315
|
+
if (!ruleMembers[i].equals("")) {
|
|
316
|
+
if (ruleMembers.length == 1) {
|
|
317
|
+
rule2 = ruleMembers[i];
|
|
318
|
+
} else {
|
|
319
|
+
rule2 = rule2 + ruleMembers[i].trim() + "\\s+";
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
// Remove the last s+
|
|
324
|
+
if (rule2.endsWith("\\s+")) {
|
|
325
|
+
rule2 = rule2.substring(0, rule2.lastIndexOf("\\s+"));
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
rule2 = "(?m)(?i)[[\\p{Punct}&&[^\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]";
|
|
329
|
+
|
|
330
|
+
Pattern p2 = Pattern.compile(rule2.trim());
|
|
331
|
+
Matcher m = p2.matcher(sentence);
|
|
332
|
+
|
|
333
|
+
while (m.find() == true) {
|
|
334
|
+
sentence = m.replaceAll(" " + ruleTokens[1].trim() + m.group().trim().replaceAll(" ", filler) + ruleTokens[1].trim() + " ");
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
// Exchange the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED]
|
|
340
|
+
// based of PREN, POST rules and if flag is set to true
|
|
341
|
+
// then based on PREP and POSP, as well.
|
|
342
|
+
|
|
343
|
+
// Because PRENEGATION [PREN} is checked first it takes precedent over
|
|
344
|
+
// POSTNEGATION [POST].
|
|
345
|
+
// Similarly POSTNEGATION [POST] takes precedent over POSSIBLE PRENEGATION [PREP]
|
|
346
|
+
// and [PREP] takes precedent over POSSIBLE POSTNEGATION [POSP].
|
|
347
|
+
|
|
348
|
+
Pattern pSpace = Pattern.compile("[\\s+]");
|
|
349
|
+
String[] sentenceTokens = pSpace.split(sentence);
|
|
350
|
+
StringBuilder sb = new StringBuilder();
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
// Check for [PREN]
|
|
354
|
+
for (int i = 0; i < sentenceTokens.length; i++) {
|
|
355
|
+
sb.append(" " + sentenceTokens[i].trim());
|
|
356
|
+
if (sentenceTokens[i].trim().startsWith("[PREN]")) {
|
|
357
|
+
|
|
358
|
+
for (int j = i + 1; j < sentenceTokens.length; j++) {
|
|
359
|
+
if (sentenceTokens[j].trim().startsWith("[CONJ]")
|
|
360
|
+
|| sentenceTokens[j].trim().startsWith("[PSEU]")
|
|
361
|
+
|| sentenceTokens[j].trim().startsWith("[POST]")
|
|
362
|
+
|| sentenceTokens[j].trim().startsWith("[PREP]")
|
|
363
|
+
|| sentenceTokens[j].trim().startsWith("[POSP]")) {
|
|
364
|
+
break;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
|
|
368
|
+
sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[NEGATED]");
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
sentence = sb.toString();
|
|
375
|
+
pSpace = Pattern.compile("[\\s+]");
|
|
376
|
+
sentenceTokens = pSpace.split(sentence);
|
|
377
|
+
StringBuilder sb2 = new StringBuilder();
|
|
378
|
+
|
|
379
|
+
// Check for [POST]
|
|
380
|
+
for (int i = sentenceTokens.length - 1; i > 0; i--) {
|
|
381
|
+
sb2.insert(0, sentenceTokens[i] + " ");
|
|
382
|
+
if (sentenceTokens[i].trim().startsWith("[POST]")) {
|
|
383
|
+
for (int j = i - 1; j > 0; j--) {
|
|
384
|
+
if (sentenceTokens[j].trim().startsWith("[CONJ]")
|
|
385
|
+
|| sentenceTokens[j].trim().startsWith("[PSEU]")
|
|
386
|
+
|| sentenceTokens[j].trim().startsWith("[PREN]")
|
|
387
|
+
|| sentenceTokens[j].trim().startsWith("[PREP]")
|
|
388
|
+
|| sentenceTokens[j].trim().startsWith("[POSP]")) {
|
|
389
|
+
break;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
|
|
393
|
+
sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[NEGATED]");
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
sentence = sb2.toString();
|
|
400
|
+
|
|
401
|
+
// If POSSIBLE negation is detected as negation.
|
|
402
|
+
// negatePossible being set to "true" then check for [PREP] and [POSP].
|
|
403
|
+
if (negPoss == true) {
|
|
404
|
+
pSpace = Pattern.compile("[\\s+]");
|
|
405
|
+
sentenceTokens = pSpace.split(sentence);
|
|
406
|
+
|
|
407
|
+
StringBuilder sb3 = new StringBuilder();
|
|
408
|
+
|
|
409
|
+
// Check for [PREP]
|
|
410
|
+
for (int i = 0; i < sentenceTokens.length; i++) {
|
|
411
|
+
sb3.append(" " + sentenceTokens[i].trim());
|
|
412
|
+
if (sentenceTokens[i].trim().startsWith("[PREP]")) {
|
|
413
|
+
|
|
414
|
+
for (int j = i + 1; j < sentenceTokens.length; j++) {
|
|
415
|
+
if (sentenceTokens[j].trim().startsWith("[CONJ]")
|
|
416
|
+
|| sentenceTokens[j].trim().startsWith("[PSEU]")
|
|
417
|
+
|| sentenceTokens[j].trim().startsWith("[POST]")
|
|
418
|
+
|| sentenceTokens[j].trim().startsWith("[PREN]")
|
|
419
|
+
|| sentenceTokens[j].trim().startsWith("[POSP]")) {
|
|
420
|
+
break;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
|
|
424
|
+
sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[POSSIBLE]");
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
sentence = sb3.toString();
|
|
431
|
+
pSpace = Pattern.compile("[\\s+]");
|
|
432
|
+
sentenceTokens = pSpace.split(sentence);
|
|
433
|
+
StringBuilder sb4 = new StringBuilder();
|
|
434
|
+
|
|
435
|
+
// Check for [POSP]
|
|
436
|
+
for (int i = sentenceTokens.length - 1; i > 0; i--) {
|
|
437
|
+
sb4.insert(0, sentenceTokens[i] + " ");
|
|
438
|
+
if (sentenceTokens[i].trim().startsWith("[POSP]")) {
|
|
439
|
+
for (int j = i - 1; j > 0; j--) {
|
|
440
|
+
if (sentenceTokens[j].trim().startsWith("[CONJ]")
|
|
441
|
+
|| sentenceTokens[j].trim().startsWith("[PSEU]")
|
|
442
|
+
|| sentenceTokens[j].trim().startsWith("[PREN]")
|
|
443
|
+
|| sentenceTokens[j].trim().startsWith("[PREP]")
|
|
444
|
+
|| sentenceTokens[j].trim().startsWith("[POST]")) {
|
|
445
|
+
break;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
|
|
449
|
+
sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[POSSIBLE]");
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
sentence = sb4.toString();
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
// Remove the filler character we used.
|
|
459
|
+
sentence = sentence.replaceAll(filler, " ");
|
|
460
|
+
|
|
461
|
+
// Remove the extra periods at the beginning
|
|
462
|
+
// and end of the sentence.
|
|
463
|
+
sentence = sentence.substring(0, sentence.trim().lastIndexOf('.'));
|
|
464
|
+
sentence = sentence.replaceFirst(".", "");
|
|
465
|
+
|
|
466
|
+
// Get the scope of the negation for PREN and PREP
|
|
467
|
+
if (sentence.contains("[PREN]") || sentence.contains("[PREP]")) {
|
|
468
|
+
int startOffset = sentence.indexOf("[PREN]");
|
|
469
|
+
if (startOffset == -1) {
|
|
470
|
+
startOffset = sentence.indexOf("[PREP]");
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
int endOffset = sentence.indexOf("[CONJ]");
|
|
474
|
+
if (endOffset == -1) {
|
|
475
|
+
endOffset = sentence.indexOf("[PSEU]");
|
|
476
|
+
}
|
|
477
|
+
if (endOffset == -1) {
|
|
478
|
+
endOffset = sentence.indexOf("[POST]");
|
|
479
|
+
}
|
|
480
|
+
if (endOffset == -1) {
|
|
481
|
+
endOffset = sentence.indexOf("[POSP]");
|
|
482
|
+
}
|
|
483
|
+
if (endOffset == -1 || endOffset < startOffset) {
|
|
484
|
+
endOffset = sentence.length() - 1;
|
|
485
|
+
}
|
|
486
|
+
sScope = sentence.substring(startOffset, endOffset + 1);
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
// Get the scope of the negation for POST and POSP
|
|
490
|
+
if (sentence.contains("[POST]") || sentence.contains("[POSP]")) {
|
|
491
|
+
int endOffset = sentence.lastIndexOf("[POST]");
|
|
492
|
+
if (endOffset == -1) {
|
|
493
|
+
endOffset = sentence.lastIndexOf("[POSP]");
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
int startOffset = sentence.lastIndexOf("[CONJ]");
|
|
497
|
+
if (startOffset == -1) {
|
|
498
|
+
startOffset = sentence.lastIndexOf("[PSEU]");
|
|
499
|
+
}
|
|
500
|
+
if (startOffset == -1) {
|
|
501
|
+
startOffset = sentence.lastIndexOf("[PREN]");
|
|
502
|
+
}
|
|
503
|
+
if (startOffset == -1) {
|
|
504
|
+
startOffset = sentence.lastIndexOf("[PREP]");
|
|
505
|
+
}
|
|
506
|
+
if (startOffset == -1) {
|
|
507
|
+
startOffset = 0;
|
|
508
|
+
}
|
|
509
|
+
sScope = sentence.substring(startOffset, endOffset);
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// Classify to: negated/possible/affirmed
|
|
513
|
+
if (sentence.contains("[NEGATED]")) {
|
|
514
|
+
sentence = sentence + "\t" + "negated" + "\t" + sScope;
|
|
515
|
+
} else if (sentence.contains("[POSSIBLE]")) {
|
|
516
|
+
sentence = sentence + "\t" + "possible" + "\t" + sScope;
|
|
517
|
+
} else {
|
|
518
|
+
sentence = sentence + "\t" + "affirmed" + "\t" + sScope;
|
|
519
|
+
}
|
|
520
|
+
previousRunScope = sScope;
|
|
521
|
+
|
|
522
|
+
sToReturn = sentence;
|
|
523
|
+
|
|
524
|
+
return sToReturn;
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
public String getPreviousRunScope() {
|
|
528
|
+
return previousRunScope;
|
|
529
|
+
}
|
|
530
|
+
}
|