abstractor 1.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/MIT-LICENSE +20 -0
- data/README.md +14 -0
- data/Rakefile +33 -0
- data/app/assets/images/abstractor/add.png +0 -0
- data/app/assets/images/abstractor/ajax-loader.gif +0 -0
- data/app/assets/images/abstractor/bar.gif +0 -0
- data/app/assets/images/abstractor/bti_logo.jpg +0 -0
- data/app/assets/images/abstractor/bullet_arrow_down.png +0 -0
- data/app/assets/images/abstractor/bullet_arrow_up.png +0 -0
- data/app/assets/images/abstractor/cog.png +0 -0
- data/app/assets/images/abstractor/delete.png +0 -0
- data/app/assets/images/abstractor/edit.png +0 -0
- data/app/assets/images/abstractor/excel.png +0 -0
- data/app/assets/images/abstractor/favicon.ico +0 -0
- data/app/assets/images/abstractor/greencheck.gif +0 -0
- data/app/assets/images/abstractor/loading.gif +0 -0
- data/app/assets/images/abstractor/nu_logo.jpg +0 -0
- data/app/assets/images/abstractor/nubic_logo.png +0 -0
- data/app/assets/images/abstractor/page.png +0 -0
- data/app/assets/images/abstractor/rails.png +0 -0
- data/app/assets/images/abstractor/robert_h_lurie.jpg +0 -0
- data/app/assets/images/abstractor/show.png +0 -0
- data/app/assets/images/abstractor/switch_minus.gif +0 -0
- data/app/assets/images/abstractor/switch_plus.gif +0 -0
- data/app/assets/javascripts/abstractor/abstractor.js +89 -0
- data/app/assets/javascripts/abstractor/application.js +21 -0
- data/app/assets/javascripts/abstractor/combobox.js +301 -0
- data/app/assets/javascripts/abstractor/jquery/jquery.form.js +1074 -0
- data/app/assets/javascripts/abstractor/nested_attributes.js +69 -0
- data/app/assets/stylesheets/abstractor/abstractor_abstractions.css +312 -0
- data/app/assets/stylesheets/abstractor/application.css +21 -0
- data/app/assets/stylesheets/abstractor/jquery.ui.override.css.scss +73 -0
- data/app/controllers/abstractor/abstractor_abstraction_groups_controller.rb +5 -0
- data/app/controllers/abstractor/abstractor_abstractions_controller.rb +5 -0
- data/app/controllers/abstractor/abstractor_suggestions_controller.rb +5 -0
- data/app/controllers/abstractor/application_controller.rb +2 -0
- data/app/helpers/abstractor/application_helper.rb +4 -0
- data/app/models/abstractor/abstractor_abstraction.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_group.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_group_member.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema_object_value.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema_predicate_variant.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema_relation.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_source.rb +6 -0
- data/app/models/abstractor/abstractor_object_type.rb +6 -0
- data/app/models/abstractor/abstractor_object_value.rb +6 -0
- data/app/models/abstractor/abstractor_object_value_variant.rb +6 -0
- data/app/models/abstractor/abstractor_relation_type.rb +6 -0
- data/app/models/abstractor/abstractor_rule_type.rb +6 -0
- data/app/models/abstractor/abstractor_subject.rb +6 -0
- data/app/models/abstractor/abstractor_subject_group.rb +6 -0
- data/app/models/abstractor/abstractor_subject_group_member.rb +6 -0
- data/app/models/abstractor/abstractor_subject_relation.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion_object_value.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion_source.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion_status.rb +6 -0
- data/app/views/abstractor/abstractor_abstraction_groups/_form.html.haml +10 -0
- data/app/views/abstractor/abstractor_abstraction_groups/edit.html.haml +1 -0
- data/app/views/abstractor/abstractor_abstractions/_fields.html.haml +63 -0
- data/app/views/abstractor/abstractor_abstractions/_list.html.haml +45 -0
- data/app/views/abstractor/abstractor_abstractions/edit.html.haml +53 -0
- data/app/views/abstractor/abstractor_abstractions/show.html.haml +1 -0
- data/app/views/abstractor/shared/_error_messages.html.haml +5 -0
- data/config/cucumber.yml +8 -0
- data/config/routes.rb +7 -0
- data/db/migrate/20131227205140_create_abstractor_object_types.rb +10 -0
- data/db/migrate/20131227205219_create_abstractor_object_values.rb +10 -0
- data/db/migrate/20131227205256_create_abstractor_object_value_variants.rb +11 -0
- data/db/migrate/20131227205320_create_abstractor_relation_types.rb +10 -0
- data/db/migrate/20131227205354_create_abstractor_rule_types.rb +11 -0
- data/db/migrate/20131227205432_create_abstractor_abstraction_schemas.rb +13 -0
- data/db/migrate/20131227205456_create_abstractor_abstraction_schema_object_values.rb +10 -0
- data/db/migrate/20131227205529_create_abstractor_abstraction_schema_predicate_variants.rb +11 -0
- data/db/migrate/20131227205610_create_abstractor_abstraction_schema_relations.rb +11 -0
- data/db/migrate/20131227205652_create_abstractor_subjects.rb +12 -0
- data/db/migrate/20131227205732_create_abstractor_subject_relations.rb +11 -0
- data/db/migrate/20131227205831_create_abstractor_abstraction_sources.rb +11 -0
- data/db/migrate/20131227210211_create_abstractor_subject_groups.rb +10 -0
- data/db/migrate/20131227210244_create_abstractor_subject_group_members.rb +11 -0
- data/db/migrate/20131227210350_create_abstractor_abstraction_groups.rb +12 -0
- data/db/migrate/20131227210353_create_abstractor_abstraction_group_members.rb +11 -0
- data/db/migrate/20131227211050_create_abstractor_suggestion_statuses.rb +10 -0
- data/db/migrate/20131227211303_create_abstractor_abstractions.rb +15 -0
- data/db/migrate/20131227213427_create_abstractor_suggestions.rb +14 -0
- data/db/migrate/20131228041944_create_abstractor_suggestion_object_values.rb +11 -0
- data/db/migrate/20131228041945_create_abstractor_suggestion_sources.rb +16 -0
- data/db/seeds.rb +0 -0
- data/lib/abstractor.rb +8 -0
- data/lib/abstractor/abstractable.rb +190 -0
- data/lib/abstractor/core_ext/string.rb +99 -0
- data/lib/abstractor/engine.rb +14 -0
- data/lib/abstractor/methods/controllers/abstractor_abstraction_groups_controller.rb +37 -0
- data/lib/abstractor/methods/controllers/abstractor_abstractions_controller.rb +42 -0
- data/lib/abstractor/methods/controllers/abstractor_suggestions_controller.rb +28 -0
- data/lib/abstractor/methods/models/abstractor_abstraction.rb +65 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_group.rb +37 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_group_member.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema.rb +27 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema_object_value.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema_predicate_variant.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema_relation.rb +18 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_source.rb +29 -0
- data/lib/abstractor/methods/models/abstractor_object_type.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_object_value.rb +24 -0
- data/lib/abstractor/methods/models/abstractor_object_value_variant.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_relation_type.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_rule_type.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_subject.rb +258 -0
- data/lib/abstractor/methods/models/abstractor_subject_group.rb +19 -0
- data/lib/abstractor/methods/models/abstractor_subject_group_member.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_subject_relation.rb +18 -0
- data/lib/abstractor/methods/models/abstractor_suggestion.rb +88 -0
- data/lib/abstractor/methods/models/abstractor_suggestion_object_value.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_suggestion_source.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_suggestion_status.rb +29 -0
- data/lib/abstractor/methods/models/soft_delete.rb +35 -0
- data/lib/abstractor/negation_detection.rb +43 -0
- data/lib/abstractor/parser.rb +76 -0
- data/lib/abstractor/setup.rb +24 -0
- data/lib/abstractor/user_interface.rb +40 -0
- data/lib/abstractor/utility.rb +8 -0
- data/lib/abstractor/version.rb +3 -0
- data/lib/generators/abstractor/install/install_generator.rb +118 -0
- data/lib/generators/abstractor/install/templates/controllers/abstractor_abstraction_groups_controller.rb +16 -0
- data/lib/generators/abstractor/install/templates/controllers/abstractor_abstractions_controller.rb +24 -0
- data/lib/generators/abstractor/install/templates/controllers/abstractor_suggestions_controller.rb +12 -0
- data/lib/generators/abstractor/install/templates/dictionaries.yml +235 -0
- data/lib/generators/abstractor/install/templates/helpers/abstractions_helper.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group_member.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_object_value.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_predicate_variant.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_relation.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_source.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_object_type.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_object_value.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_object_value_variant.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_relation_type.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_rule_type.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject_group.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject_group_member.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject_relation.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_object_value.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_source.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_status.rb +9 -0
- data/lib/lingscope/build.xml +74 -0
- data/lib/lingscope/build/built-jar.properties +4 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/AbnerTokenizer.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/AnnotationComparer.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/Annotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/BaselineAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/BaselineCueAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/BaselineScopeAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/CrfAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/NegexAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/NegexCueAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/NegexScopeAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/PosTagger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/negex/GenNegEx.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/negex/Sorter.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/AnnotatedFilesMerger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/AnnotationComparerDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/BaselineDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/CrfDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/CueAndPosFilesMerger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/ModelTrainer.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/NegexDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/PosTaggerDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/SentencePosTagger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/SentenceTagger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/io/AnnotatedSentencesIO.class +0 -0
- data/lib/lingscope/build/classes/lingscope/structures/AnnotatedSentence.class +0 -0
- data/lib/lingscope/dist/README.TXT +32 -0
- data/lib/lingscope/dist/javadoc/allclasses-frame.html +80 -0
- data/lib/lingscope/dist/javadoc/allclasses-noframe.html +80 -0
- data/lib/lingscope/dist/javadoc/constant-values.html +199 -0
- data/lib/lingscope/dist/javadoc/deprecated-list.html +147 -0
- data/lib/lingscope/dist/javadoc/help-doc.html +224 -0
- data/lib/lingscope/dist/javadoc/index-files/index-1.html +188 -0
- data/lib/lingscope/dist/javadoc/index-files/index-10.html +149 -0
- data/lib/lingscope/dist/javadoc/index-files/index-11.html +158 -0
- data/lib/lingscope/dist/javadoc/index-files/index-12.html +157 -0
- data/lib/lingscope/dist/javadoc/index-files/index-13.html +177 -0
- data/lib/lingscope/dist/javadoc/index-files/index-14.html +155 -0
- data/lib/lingscope/dist/javadoc/index-files/index-15.html +152 -0
- data/lib/lingscope/dist/javadoc/index-files/index-16.html +146 -0
- data/lib/lingscope/dist/javadoc/index-files/index-2.html +158 -0
- data/lib/lingscope/dist/javadoc/index-files/index-3.html +165 -0
- data/lib/lingscope/dist/javadoc/index-files/index-4.html +146 -0
- data/lib/lingscope/dist/javadoc/index-files/index-5.html +219 -0
- data/lib/lingscope/dist/javadoc/index-files/index-6.html +149 -0
- data/lib/lingscope/dist/javadoc/index-files/index-7.html +155 -0
- data/lib/lingscope/dist/javadoc/index-files/index-8.html +185 -0
- data/lib/lingscope/dist/javadoc/index-files/index-9.html +164 -0
- data/lib/lingscope/dist/javadoc/index.html +74 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/AbnerTokenizer.html +280 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/AnnotationComparer.html +526 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/Annotator.html +401 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineAnnotator.html +375 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineCueAnnotator.html +309 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineScopeAnnotator.html +340 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/CrfAnnotator.html +340 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexAnnotator.html +364 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexCueAnnotator.html +309 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexScopeAnnotator.html +309 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/PosTagger.html +268 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AbnerTokenizer.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AnnotationComparer.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/Annotator.html +299 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineAnnotator.html +189 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineCueAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineScopeAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/CrfAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexAnnotator.html +189 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexCueAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexScopeAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/PosTagger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/GenNegEx.html +369 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/Sorter.html +253 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/GenNegEx.html +181 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/Sorter.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-frame.html +35 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-summary.html +165 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-use.html +174 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-frame.html +53 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-summary.html +198 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-tree.html +160 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-use.html +202 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotatedFilesMerger.html +284 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotationComparerDriver.html +258 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/BaselineDriver.html +260 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/CrfDriver.html +262 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/CueAndPosFilesMerger.html +310 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/ModelTrainer.html +281 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/NegexDriver.html +262 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/PosTaggerDriver.html +289 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/SentencePosTagger.html +313 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/SentenceTagger.html +512 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotatedFilesMerger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotationComparerDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/BaselineDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CrfDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CueAndPosFilesMerger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/ModelTrainer.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/NegexDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/PosTaggerDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentencePosTagger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentenceTagger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-frame.html +51 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-summary.html +195 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-use.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/AnnotatedSentencesIO.html +285 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/class-use/AnnotatedSentencesIO.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-frame.html +33 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-summary.html +158 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-use.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/AnnotatedSentence.html +425 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/class-use/AnnotatedSentence.html +423 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-frame.html +33 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-summary.html +158 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-use.html +209 -0
- data/lib/lingscope/dist/javadoc/overview-frame.html +51 -0
- data/lib/lingscope/dist/javadoc/overview-summary.html +168 -0
- data/lib/lingscope/dist/javadoc/overview-tree.html +159 -0
- data/lib/lingscope/dist/javadoc/package-list +5 -0
- data/lib/lingscope/dist/javadoc/resources/inherit.gif +0 -0
- data/lib/lingscope/dist/javadoc/stylesheet.css +29 -0
- data/lib/lingscope/dist/lib/abner.jar +0 -0
- data/lib/lingscope/dist/lib/commons-lang-2.4.jar +0 -0
- data/lib/lingscope/dist/lib/java_utils.jar +0 -0
- data/lib/lingscope/dist/lib/sptoolkit.jar +0 -0
- data/lib/lingscope/dist/lib/stanford-postagger.jar +0 -0
- data/lib/lingscope/dist/lingscope.jar +0 -0
- data/lib/lingscope/lingscope.zip +0 -0
- data/lib/lingscope/manifest.mf +3 -0
- data/lib/lingscope/nbproject/build-impl.xml +1338 -0
- data/lib/lingscope/nbproject/genfiles.properties +8 -0
- data/lib/lingscope/nbproject/private/config.properties +0 -0
- data/lib/lingscope/nbproject/private/private.properties +10 -0
- data/lib/lingscope/nbproject/private/private.xml +5 -0
- data/lib/lingscope/nbproject/project.properties +85 -0
- data/lib/lingscope/nbproject/project.xml +15 -0
- data/lib/lingscope/negation_models.zip +0 -0
- data/lib/lingscope/negation_models/baseline_cue_all_both.model +49 -0
- data/lib/lingscope/negation_models/baseline_cue_all_clinical.model +19 -0
- data/lib/lingscope/negation_models/crf_cue_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_cue_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_crf_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_crf_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_regex_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_regex_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_crf_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_crf_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_regex_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_regex_all_clinical.model +0 -0
- data/lib/lingscope/src/lingscope/algorithms/AbnerTokenizer.java +58 -0
- data/lib/lingscope/src/lingscope/algorithms/AnnotationComparer.java +207 -0
- data/lib/lingscope/src/lingscope/algorithms/Annotator.java +51 -0
- data/lib/lingscope/src/lingscope/algorithms/BaselineAnnotator.java +80 -0
- data/lib/lingscope/src/lingscope/algorithms/BaselineCueAnnotator.java +84 -0
- data/lib/lingscope/src/lingscope/algorithms/BaselineScopeAnnotator.java +101 -0
- data/lib/lingscope/src/lingscope/algorithms/CrfAnnotator.java +45 -0
- data/lib/lingscope/src/lingscope/algorithms/NegexAnnotator.java +52 -0
- data/lib/lingscope/src/lingscope/algorithms/NegexCueAnnotator.java +26 -0
- data/lib/lingscope/src/lingscope/algorithms/NegexScopeAnnotator.java +26 -0
- data/lib/lingscope/src/lingscope/algorithms/PosTagger.java +54 -0
- data/lib/lingscope/src/lingscope/algorithms/negex/GenNegEx.java +530 -0
- data/lib/lingscope/src/lingscope/algorithms/negex/Sorter.java +36 -0
- data/lib/lingscope/src/lingscope/drivers/AnnotatedFilesMerger.java +61 -0
- data/lib/lingscope/src/lingscope/drivers/AnnotationComparerDriver.java +22 -0
- data/lib/lingscope/src/lingscope/drivers/BaselineDriver.java +45 -0
- data/lib/lingscope/src/lingscope/drivers/CrfDriver.java +31 -0
- data/lib/lingscope/src/lingscope/drivers/CueAndPosFilesMerger.java +86 -0
- data/lib/lingscope/src/lingscope/drivers/ModelTrainer.java +39 -0
- data/lib/lingscope/src/lingscope/drivers/NegexDriver.java +32 -0
- data/lib/lingscope/src/lingscope/drivers/PosTaggerDriver.java +62 -0
- data/lib/lingscope/src/lingscope/drivers/SentencePosTagger.java +89 -0
- data/lib/lingscope/src/lingscope/drivers/SentenceTagger.java +158 -0
- data/lib/lingscope/src/lingscope/io/AnnotatedSentencesIO.java +53 -0
- data/lib/lingscope/src/lingscope/structures/AnnotatedSentence.java +105 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.csv +1 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.txt +2273 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.csv +1 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.txt +1342 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/icd-o3 readme.txt +21 -0
- data/lib/setup/data/custom_site_synonyms.csv +1 -0
- data/lib/setup/data/diagnosis_categorizations.csv +1 -0
- data/lib/setup/data/high_level_primary_cns_diagnosis_categorizations.yml +597 -0
- data/lib/setup/data/icdo3_sites_with_laterality.txt +11 -0
- data/lib/setup/data/site_site_categories.txt +28 -0
- data/lib/tasks/abstractor_tasks.rake +22 -0
- data/lib/tasks/cucumber.rake +65 -0
- metadata +754 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
package lingscope.algorithms;
|
2
|
+
|
3
|
+
import lingscope.structures.AnnotatedSentence;
|
4
|
+
|
5
|
+
/**
|
6
|
+
*
|
7
|
+
* @author shashank
|
8
|
+
*/
|
9
|
+
public class NegexScopeAnnotator extends NegexAnnotator {
|
10
|
+
|
11
|
+
public NegexScopeAnnotator(String beginTag, String interTag, String otherTag) {
|
12
|
+
super(beginTag, interTag, otherTag);
|
13
|
+
}
|
14
|
+
|
15
|
+
@Override
|
16
|
+
public AnnotatedSentence annotateSentence(String sentence, boolean isTokenized) {
|
17
|
+
if (negex == null) {
|
18
|
+
throw new RuntimeException("Annotator has not been loaded");
|
19
|
+
}
|
20
|
+
if (!isTokenized) {
|
21
|
+
sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
|
22
|
+
}
|
23
|
+
String raw = negex.getScope(sentence, rules, beginTag, interTag, otherTag);
|
24
|
+
return new AnnotatedSentence(raw);
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,54 @@
|
|
1
|
+
package lingscope.algorithms;
|
2
|
+
|
3
|
+
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
|
4
|
+
import java.util.ArrayList;
|
5
|
+
import java.util.List;
|
6
|
+
import java.util.logging.Level;
|
7
|
+
import java.util.logging.Logger;
|
8
|
+
|
9
|
+
/**
|
10
|
+
* Part of speech tagger
|
11
|
+
* @author shashank
|
12
|
+
*/
|
13
|
+
public class PosTagger {
|
14
|
+
|
15
|
+
private MaxentTagger posTagger;
|
16
|
+
|
17
|
+
/**
|
18
|
+
* Creates an instance of POS tagger by loading the given grammar file
|
19
|
+
* @param grammarFile
|
20
|
+
*/
|
21
|
+
public PosTagger(String grammarFile) {
|
22
|
+
try {
|
23
|
+
posTagger = new MaxentTagger(grammarFile);
|
24
|
+
} catch (Exception ex) {
|
25
|
+
Logger.getLogger(PosTagger.class.getName()).log(Level.SEVERE, null, ex);
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
/**
|
30
|
+
* Takes a sentence as input and returns list of POS tags associated with
|
31
|
+
* each word in the sentence
|
32
|
+
* @param sentence
|
33
|
+
* @param isTokenized
|
34
|
+
* @return
|
35
|
+
*/
|
36
|
+
public List<String> replaceWordsWithPos(String sentence, boolean isTokenized) {
|
37
|
+
if (!isTokenized) {
|
38
|
+
sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
|
39
|
+
}
|
40
|
+
List<String> ret = new ArrayList<String>();
|
41
|
+
String tagged = "";
|
42
|
+
try {
|
43
|
+
tagged = posTagger.tagString(sentence);
|
44
|
+
} catch (Exception ex) {
|
45
|
+
Logger.getLogger(PosTagger.class.getName()).log(Level.SEVERE, null, ex);
|
46
|
+
}
|
47
|
+
for (String wordTag : tagged.split(" +")) {
|
48
|
+
String[] tags = wordTag.split("/");
|
49
|
+
String tag = tags[tags.length - 1];
|
50
|
+
ret.add(tag);
|
51
|
+
}
|
52
|
+
return ret;
|
53
|
+
}
|
54
|
+
}
|
@@ -0,0 +1,530 @@
|
|
1
|
+
package lingscope.algorithms.negex;
|
2
|
+
|
3
|
+
import java.util.regex.Matcher;
|
4
|
+
import java.util.regex.Pattern;
|
5
|
+
import java.util.*;
|
6
|
+
|
7
|
+
/***************************************************************************************
|
8
|
+
* Author: Imre Solti
|
9
|
+
* Date: 09/15/2008
|
10
|
+
* Modified: 04/15/2009
|
11
|
+
* Changed to specifications of test kit and discussions with WC and PH.
|
12
|
+
* Modified: 04/26/2009
|
13
|
+
* Fixed the deletion of last character in scope fo PREN, PREP negation scopes.
|
14
|
+
*
|
15
|
+
* Wendy Chapman's NegEx algorithm in Java.
|
16
|
+
*
|
17
|
+
* Sentence boundaries serve as WINDOW for negation (suggested by Wendy Chapman)
|
18
|
+
*
|
19
|
+
****************************************************************************************/
|
20
|
+
|
21
|
+
/*
|
22
|
+
Copyright 2008 Imre Solti
|
23
|
+
|
24
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
25
|
+
|
26
|
+
you may not use this file except in compliance with the License. You may obtain a copy of the License at
|
27
|
+
|
28
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
29
|
+
|
30
|
+
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
|
31
|
+
WITHOUT
|
32
|
+
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and
|
33
|
+
limitations under the License.
|
34
|
+
*/
|
35
|
+
public class GenNegEx {
|
36
|
+
|
37
|
+
private String previousRunScope;
|
38
|
+
private String[] sentenceWords;
|
39
|
+
private String[] tags;
|
40
|
+
|
41
|
+
public String getCue(String sentenceString, List<String> ruleStrings, String beginTag, String interTag, String otherTag) {
|
42
|
+
tagNegation(sentenceString, ruleStrings);
|
43
|
+
StringBuilder ret = new StringBuilder();
|
44
|
+
boolean inScope = false;
|
45
|
+
for (int i = 0; i < sentenceWords.length; ++i) {
|
46
|
+
String tag = tags[i];
|
47
|
+
String sentenceWord = sentenceWords[i];
|
48
|
+
ret.append(" ").append(sentenceWord).append("|");
|
49
|
+
if (tag.equalsIgnoreCase("cue")) {
|
50
|
+
if (inScope) {
|
51
|
+
ret.append(interTag);
|
52
|
+
} else {
|
53
|
+
ret.append(beginTag);
|
54
|
+
inScope = true;
|
55
|
+
}
|
56
|
+
} else {
|
57
|
+
ret.append(otherTag);
|
58
|
+
inScope = false;
|
59
|
+
}
|
60
|
+
}
|
61
|
+
return ret.substring(1);
|
62
|
+
}
|
63
|
+
|
64
|
+
public String getScope(String sentenceString, List<String> ruleStrings, String beginTag, String interTag, String otherTag) {
|
65
|
+
tagNegation(sentenceString, ruleStrings);
|
66
|
+
StringBuilder ret = new StringBuilder();
|
67
|
+
boolean inScope = false;
|
68
|
+
Pattern punct = Pattern.compile("\\p{Punct}");
|
69
|
+
for (int i = 0; i < sentenceWords.length; ++i) {
|
70
|
+
String tag = tags[i];
|
71
|
+
String sentenceWord = sentenceWords[i];
|
72
|
+
ret.append(" ").append(sentenceWord).append("|");
|
73
|
+
Matcher m = punct.matcher(sentenceWord);
|
74
|
+
if (m.matches()) {
|
75
|
+
if (i + 1 == sentenceWords.length) {
|
76
|
+
ret.append(otherTag);
|
77
|
+
continue;
|
78
|
+
} else if (tags[i + 1].equalsIgnoreCase("other")) {
|
79
|
+
ret.append(otherTag);
|
80
|
+
continue;
|
81
|
+
}
|
82
|
+
}
|
83
|
+
if (tag.equalsIgnoreCase("cue") || tag.equalsIgnoreCase("scope")) {
|
84
|
+
if (inScope) {
|
85
|
+
ret.append(interTag);
|
86
|
+
} else {
|
87
|
+
ret.append(beginTag);
|
88
|
+
inScope = true;
|
89
|
+
}
|
90
|
+
} else {
|
91
|
+
ret.append(otherTag);
|
92
|
+
inScope = false;
|
93
|
+
}
|
94
|
+
}
|
95
|
+
return ret.substring(1);
|
96
|
+
}
|
97
|
+
|
98
|
+
public void tagNegation(String sentenceString, List<String> ruleStrings) {
|
99
|
+
Sorter s = new Sorter();
|
100
|
+
sentenceWords = sentenceString.split("\\s+");
|
101
|
+
tags = new String[sentenceWords.length];
|
102
|
+
for (int i = 0; i < sentenceWords.length; ++i) {
|
103
|
+
tags[i] = "other";
|
104
|
+
}
|
105
|
+
String sToReturn = "";
|
106
|
+
String sScope = "";
|
107
|
+
List<String> sortedRules = new ArrayList<String>();
|
108
|
+
|
109
|
+
String filler = "__";
|
110
|
+
boolean negPoss = false;
|
111
|
+
|
112
|
+
// Sort the rules by length in descending order.
|
113
|
+
// Rules need to be sorted so the longest rule is always tried to match
|
114
|
+
// first.
|
115
|
+
// Some of the rules overlap so without sorting first shorter rules (some of them POSSIBLE or PSEUDO)
|
116
|
+
// would match before longer legitimate negation rules.
|
117
|
+
//
|
118
|
+
|
119
|
+
// There is efficiency issue here. It is better if rules are sorted by the
|
120
|
+
// calling program once and used without sorting in GennegEx.
|
121
|
+
sortedRules = s.sortRules(ruleStrings);
|
122
|
+
|
123
|
+
// Process the sentence and tag each matched negation
|
124
|
+
// rule with correct negation rule tag.
|
125
|
+
//
|
126
|
+
// At the same time check for the phrase that we want to decide
|
127
|
+
// the negation status for and
|
128
|
+
// tag the phrase with [PHRASE] ... [PHRASE]
|
129
|
+
// In both the negation rules and in the phrase replace white space
|
130
|
+
// with "filler" string. (This could cause problems if the sentences
|
131
|
+
// we study has "filler" on their own.)
|
132
|
+
|
133
|
+
// Sentence needs one character in the beginning and end to match.
|
134
|
+
// We remove the extra characters after processing.
|
135
|
+
String sentence = "." + sentenceString + ".";
|
136
|
+
|
137
|
+
// Tag the phrases we want to detect for negation.
|
138
|
+
// Should happen before rule detection.
|
139
|
+
|
140
|
+
Iterator<String> iRule = sortedRules.iterator();
|
141
|
+
while (iRule.hasNext()) {
|
142
|
+
String rule = iRule.next();
|
143
|
+
Pattern p = Pattern.compile("[\\t]+"); // Working.
|
144
|
+
String[] ruleTokens = p.split(rule.trim());
|
145
|
+
String[] ruleMembers = ruleTokens[0].trim().split(" ");
|
146
|
+
|
147
|
+
|
148
|
+
// Add the regular expression characters to tokens and asemble the rule again.
|
149
|
+
String rule2 = "";
|
150
|
+
for (int i = 0; i < ruleMembers.length; i++) {
|
151
|
+
if (!ruleMembers[i].equals("")) {
|
152
|
+
if (ruleMembers.length == 1) {
|
153
|
+
rule2 = ruleMembers[i];
|
154
|
+
} else {
|
155
|
+
rule2 = rule2 + ruleMembers[i].trim() + "\\s+";
|
156
|
+
}
|
157
|
+
}
|
158
|
+
}
|
159
|
+
// Remove the last s+
|
160
|
+
if (rule2.endsWith("\\s+")) {
|
161
|
+
rule2 = rule2.substring(0, rule2.lastIndexOf("\\s+"));
|
162
|
+
}
|
163
|
+
|
164
|
+
rule2 = "(?m)(?i)[[\\p{Punct}&&[^-_\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]";
|
165
|
+
// rule2 = "(?m)(?i)[[\\p{Punct}&&[^\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]";
|
166
|
+
|
167
|
+
Pattern p2 = Pattern.compile(rule2.trim());
|
168
|
+
Matcher m = p2.matcher(sentence);
|
169
|
+
Matcher m2 = p2.matcher(sentence);
|
170
|
+
if (m2.find()) {
|
171
|
+
/**
|
172
|
+
* Only PREN and POST rules matter
|
173
|
+
* added by Shashank
|
174
|
+
*/
|
175
|
+
if (ruleTokens[1].equalsIgnoreCase("[PREN]") || ruleTokens[1].equalsIgnoreCase("[POST]")) {
|
176
|
+
for (int i = 0; i < sentenceWords.length; ++i) {
|
177
|
+
boolean perfectMatch = true;
|
178
|
+
for (int j = 0; j < ruleMembers.length; ++j) {
|
179
|
+
if (!sentenceWords[i + j].equalsIgnoreCase(ruleMembers[j])) {
|
180
|
+
perfectMatch = false;
|
181
|
+
break;
|
182
|
+
}
|
183
|
+
}
|
184
|
+
if (perfectMatch) {
|
185
|
+
for (int j = 0; j < ruleMembers.length; ++j) {
|
186
|
+
tags[i + j] = "cue";
|
187
|
+
}
|
188
|
+
}
|
189
|
+
}
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
while (m.find() == true) {
|
194
|
+
sentence = m.replaceAll(" " + ruleTokens[1].trim() + m.group().trim().replaceAll(" ", filler) + ruleTokens[1].trim() + " ");
|
195
|
+
}
|
196
|
+
}
|
197
|
+
|
198
|
+
|
199
|
+
// Exchange the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED]
|
200
|
+
// based of PREN, POST rules and if flag is set to true
|
201
|
+
// then based on PREP and POSP, as well.
|
202
|
+
|
203
|
+
// Because PRENEGATION [PREN} is checked first it takes precedent over
|
204
|
+
// POSTNEGATION [POST].
|
205
|
+
// Similarly POSTNEGATION [POST] takes precedent over POSSIBLE PRENEGATION [PREP]
|
206
|
+
// and [PREP] takes precedent over POSSIBLE POSTNEGATION [POSP].
|
207
|
+
|
208
|
+
String splitPattern = "\\s+|[^\\s]__";
|
209
|
+
Pattern pSpace = Pattern.compile(splitPattern);
|
210
|
+
String[] sentenceTokens = pSpace.split(sentence.trim());
|
211
|
+
StringBuilder sb = new StringBuilder();
|
212
|
+
|
213
|
+
|
214
|
+
// Check for [PREN]
|
215
|
+
for (int i = 0; i < sentenceTokens.length; i++) {
|
216
|
+
sb.append(" ").append(sentenceTokens[i].trim());
|
217
|
+
if (sentenceTokens[i].trim().startsWith("[PREN]")) {
|
218
|
+
|
219
|
+
for (int j = i + 1; j < sentenceTokens.length; j++) {
|
220
|
+
if (sentenceTokens[j].trim().startsWith("[CONJ]")
|
221
|
+
|| sentenceTokens[j].trim().startsWith("[PSEU]")
|
222
|
+
|| sentenceTokens[j].trim().startsWith("[POST]")
|
223
|
+
|| sentenceTokens[j].trim().startsWith("[PREP]")
|
224
|
+
|| sentenceTokens[j].trim().startsWith("[POSP]")) {
|
225
|
+
break;
|
226
|
+
}
|
227
|
+
if (!tags[j].equalsIgnoreCase("cue")) {
|
228
|
+
tags[j] = "scope";
|
229
|
+
}
|
230
|
+
}
|
231
|
+
}
|
232
|
+
}
|
233
|
+
|
234
|
+
sentence = sb.toString().trim();
|
235
|
+
pSpace = Pattern.compile(splitPattern);
|
236
|
+
sentenceTokens = pSpace.split(sentence);
|
237
|
+
StringBuilder sb2 = new StringBuilder();
|
238
|
+
|
239
|
+
// Check for [POST]
|
240
|
+
for (int i = sentenceTokens.length - 1; i > 0; i--) {
|
241
|
+
sb2.insert(0, sentenceTokens[i] + " ");
|
242
|
+
if (sentenceTokens[i].trim().startsWith("[POST]")) {
|
243
|
+
for (int j = i - 1; j > 0; j--) {
|
244
|
+
if (sentenceTokens[j].trim().startsWith("[CONJ]")
|
245
|
+
|| sentenceTokens[j].trim().startsWith("[PSEU]")
|
246
|
+
|| sentenceTokens[j].trim().startsWith("[PREN]")
|
247
|
+
|| sentenceTokens[j].trim().startsWith("[PREP]")
|
248
|
+
|| sentenceTokens[j].trim().startsWith("[POSP]")) {
|
249
|
+
break;
|
250
|
+
}
|
251
|
+
if (!tags[j].equalsIgnoreCase("cue")) {
|
252
|
+
tags[j] = "scope";
|
253
|
+
}
|
254
|
+
}
|
255
|
+
}
|
256
|
+
}
|
257
|
+
}
|
258
|
+
|
259
|
+
public String negCheck(String sentenceString, String phraseString, ArrayList ruleStrings,
|
260
|
+
boolean negatePossible) throws Exception {
|
261
|
+
|
262
|
+
Sorter s = new Sorter();
|
263
|
+
String sToReturn = "";
|
264
|
+
String sScope = "";
|
265
|
+
String sentencePortion = "";
|
266
|
+
|
267
|
+
String filler = "_";
|
268
|
+
boolean negPoss = negatePossible;
|
269
|
+
boolean negationScope = true;
|
270
|
+
|
271
|
+
// Sort the rules by length in descending order.
|
272
|
+
// Rules need to be sorted so the longest rule is always tried to match
|
273
|
+
// first.
|
274
|
+
// Some of the rules overlap so without sorting first shorter rules (some of them POSSIBLE or PSEUDO)
|
275
|
+
// would match before longer legitimate negation rules.
|
276
|
+
//
|
277
|
+
|
278
|
+
// There is efficiency issue here. It is better if rules are sorted by the
|
279
|
+
// calling program once and used without sorting in GennegEx.
|
280
|
+
List<String> sortedRules = s.sortRules(ruleStrings);
|
281
|
+
|
282
|
+
// Process the sentence and tag each matched negation
|
283
|
+
// rule with correct negation rule tag.
|
284
|
+
//
|
285
|
+
// At the same time check for the phrase that we want to decide
|
286
|
+
// the negation status for and
|
287
|
+
// tag the phrase with [PHRASE] ... [PHRASE]
|
288
|
+
// In both the negation rules and in the phrase replace white space
|
289
|
+
// with "filler" string. (This could cause problems if the sentences
|
290
|
+
// we study has "filler" on their own.)
|
291
|
+
|
292
|
+
// Sentence needs one character in the beginning and end to match.
|
293
|
+
// We remove the extra characters after processing.
|
294
|
+
String sentence = "." + sentenceString + ".";
|
295
|
+
|
296
|
+
// Tag the phrases we want to detect for negation.
|
297
|
+
// Should happen before rule detection.
|
298
|
+
String phrase = phraseString;
|
299
|
+
Pattern pph = Pattern.compile(phrase.trim(), Pattern.CASE_INSENSITIVE);
|
300
|
+
Matcher mph = pph.matcher(sentence);
|
301
|
+
|
302
|
+
while (mph.find() == true) {
|
303
|
+
sentence = mph.replaceAll(" [PHRASE]" + mph.group().trim().replaceAll(" ", filler) + "[PHRASE]");
|
304
|
+
}
|
305
|
+
|
306
|
+
Iterator iRule = sortedRules.iterator();
|
307
|
+
while (iRule.hasNext()) {
|
308
|
+
String rule = (String) iRule.next();
|
309
|
+
Pattern p = Pattern.compile("[\\t]+"); // Working.
|
310
|
+
String[] ruleTokens = p.split(rule.trim());
|
311
|
+
// Add the regular expression characters to tokens and asemble the rule again.
|
312
|
+
String[] ruleMembers = ruleTokens[0].trim().split(" ");
|
313
|
+
String rule2 = "";
|
314
|
+
for (int i = 0; i <= ruleMembers.length - 1; i++) {
|
315
|
+
if (!ruleMembers[i].equals("")) {
|
316
|
+
if (ruleMembers.length == 1) {
|
317
|
+
rule2 = ruleMembers[i];
|
318
|
+
} else {
|
319
|
+
rule2 = rule2 + ruleMembers[i].trim() + "\\s+";
|
320
|
+
}
|
321
|
+
}
|
322
|
+
}
|
323
|
+
// Remove the last s+
|
324
|
+
if (rule2.endsWith("\\s+")) {
|
325
|
+
rule2 = rule2.substring(0, rule2.lastIndexOf("\\s+"));
|
326
|
+
}
|
327
|
+
|
328
|
+
rule2 = "(?m)(?i)[[\\p{Punct}&&[^\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]";
|
329
|
+
|
330
|
+
Pattern p2 = Pattern.compile(rule2.trim());
|
331
|
+
Matcher m = p2.matcher(sentence);
|
332
|
+
|
333
|
+
while (m.find() == true) {
|
334
|
+
sentence = m.replaceAll(" " + ruleTokens[1].trim() + m.group().trim().replaceAll(" ", filler) + ruleTokens[1].trim() + " ");
|
335
|
+
}
|
336
|
+
}
|
337
|
+
|
338
|
+
|
339
|
+
// Exchange the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED]
|
340
|
+
// based of PREN, POST rules and if flag is set to true
|
341
|
+
// then based on PREP and POSP, as well.
|
342
|
+
|
343
|
+
// Because PRENEGATION [PREN} is checked first it takes precedent over
|
344
|
+
// POSTNEGATION [POST].
|
345
|
+
// Similarly POSTNEGATION [POST] takes precedent over POSSIBLE PRENEGATION [PREP]
|
346
|
+
// and [PREP] takes precedent over POSSIBLE POSTNEGATION [POSP].
|
347
|
+
|
348
|
+
Pattern pSpace = Pattern.compile("[\\s+]");
|
349
|
+
String[] sentenceTokens = pSpace.split(sentence);
|
350
|
+
StringBuilder sb = new StringBuilder();
|
351
|
+
|
352
|
+
|
353
|
+
// Check for [PREN]
|
354
|
+
for (int i = 0; i < sentenceTokens.length; i++) {
|
355
|
+
sb.append(" " + sentenceTokens[i].trim());
|
356
|
+
if (sentenceTokens[i].trim().startsWith("[PREN]")) {
|
357
|
+
|
358
|
+
for (int j = i + 1; j < sentenceTokens.length; j++) {
|
359
|
+
if (sentenceTokens[j].trim().startsWith("[CONJ]")
|
360
|
+
|| sentenceTokens[j].trim().startsWith("[PSEU]")
|
361
|
+
|| sentenceTokens[j].trim().startsWith("[POST]")
|
362
|
+
|| sentenceTokens[j].trim().startsWith("[PREP]")
|
363
|
+
|| sentenceTokens[j].trim().startsWith("[POSP]")) {
|
364
|
+
break;
|
365
|
+
}
|
366
|
+
|
367
|
+
if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
|
368
|
+
sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[NEGATED]");
|
369
|
+
}
|
370
|
+
}
|
371
|
+
}
|
372
|
+
}
|
373
|
+
|
374
|
+
sentence = sb.toString();
|
375
|
+
pSpace = Pattern.compile("[\\s+]");
|
376
|
+
sentenceTokens = pSpace.split(sentence);
|
377
|
+
StringBuilder sb2 = new StringBuilder();
|
378
|
+
|
379
|
+
// Check for [POST]
|
380
|
+
for (int i = sentenceTokens.length - 1; i > 0; i--) {
|
381
|
+
sb2.insert(0, sentenceTokens[i] + " ");
|
382
|
+
if (sentenceTokens[i].trim().startsWith("[POST]")) {
|
383
|
+
for (int j = i - 1; j > 0; j--) {
|
384
|
+
if (sentenceTokens[j].trim().startsWith("[CONJ]")
|
385
|
+
|| sentenceTokens[j].trim().startsWith("[PSEU]")
|
386
|
+
|| sentenceTokens[j].trim().startsWith("[PREN]")
|
387
|
+
|| sentenceTokens[j].trim().startsWith("[PREP]")
|
388
|
+
|| sentenceTokens[j].trim().startsWith("[POSP]")) {
|
389
|
+
break;
|
390
|
+
}
|
391
|
+
|
392
|
+
if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
|
393
|
+
sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[NEGATED]");
|
394
|
+
}
|
395
|
+
}
|
396
|
+
}
|
397
|
+
}
|
398
|
+
|
399
|
+
sentence = sb2.toString();
|
400
|
+
|
401
|
+
// If POSSIBLE negation is detected as negation.
|
402
|
+
// negatePossible being set to "true" then check for [PREP] and [POSP].
|
403
|
+
if (negPoss == true) {
|
404
|
+
pSpace = Pattern.compile("[\\s+]");
|
405
|
+
sentenceTokens = pSpace.split(sentence);
|
406
|
+
|
407
|
+
StringBuilder sb3 = new StringBuilder();
|
408
|
+
|
409
|
+
// Check for [PREP]
|
410
|
+
for (int i = 0; i < sentenceTokens.length; i++) {
|
411
|
+
sb3.append(" " + sentenceTokens[i].trim());
|
412
|
+
if (sentenceTokens[i].trim().startsWith("[PREP]")) {
|
413
|
+
|
414
|
+
for (int j = i + 1; j < sentenceTokens.length; j++) {
|
415
|
+
if (sentenceTokens[j].trim().startsWith("[CONJ]")
|
416
|
+
|| sentenceTokens[j].trim().startsWith("[PSEU]")
|
417
|
+
|| sentenceTokens[j].trim().startsWith("[POST]")
|
418
|
+
|| sentenceTokens[j].trim().startsWith("[PREN]")
|
419
|
+
|| sentenceTokens[j].trim().startsWith("[POSP]")) {
|
420
|
+
break;
|
421
|
+
}
|
422
|
+
|
423
|
+
if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
|
424
|
+
sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[POSSIBLE]");
|
425
|
+
}
|
426
|
+
}
|
427
|
+
}
|
428
|
+
}
|
429
|
+
|
430
|
+
sentence = sb3.toString();
|
431
|
+
pSpace = Pattern.compile("[\\s+]");
|
432
|
+
sentenceTokens = pSpace.split(sentence);
|
433
|
+
StringBuilder sb4 = new StringBuilder();
|
434
|
+
|
435
|
+
// Check for [POSP]
|
436
|
+
for (int i = sentenceTokens.length - 1; i > 0; i--) {
|
437
|
+
sb4.insert(0, sentenceTokens[i] + " ");
|
438
|
+
if (sentenceTokens[i].trim().startsWith("[POSP]")) {
|
439
|
+
for (int j = i - 1; j > 0; j--) {
|
440
|
+
if (sentenceTokens[j].trim().startsWith("[CONJ]")
|
441
|
+
|| sentenceTokens[j].trim().startsWith("[PSEU]")
|
442
|
+
|| sentenceTokens[j].trim().startsWith("[PREN]")
|
443
|
+
|| sentenceTokens[j].trim().startsWith("[PREP]")
|
444
|
+
|| sentenceTokens[j].trim().startsWith("[POST]")) {
|
445
|
+
break;
|
446
|
+
}
|
447
|
+
|
448
|
+
if (sentenceTokens[j].trim().startsWith("[PHRASE]")) {
|
449
|
+
sentenceTokens[j] = sentenceTokens[j].trim().replaceAll("\\[PHRASE\\]", "[POSSIBLE]");
|
450
|
+
}
|
451
|
+
}
|
452
|
+
}
|
453
|
+
}
|
454
|
+
|
455
|
+
sentence = sb4.toString();
|
456
|
+
}
|
457
|
+
|
458
|
+
// Remove the filler character we used.
|
459
|
+
sentence = sentence.replaceAll(filler, " ");
|
460
|
+
|
461
|
+
// Remove the extra periods at the beginning
|
462
|
+
// and end of the sentence.
|
463
|
+
sentence = sentence.substring(0, sentence.trim().lastIndexOf('.'));
|
464
|
+
sentence = sentence.replaceFirst(".", "");
|
465
|
+
|
466
|
+
// Get the scope of the negation for PREN and PREP
|
467
|
+
if (sentence.contains("[PREN]") || sentence.contains("[PREP]")) {
|
468
|
+
int startOffset = sentence.indexOf("[PREN]");
|
469
|
+
if (startOffset == -1) {
|
470
|
+
startOffset = sentence.indexOf("[PREP]");
|
471
|
+
}
|
472
|
+
|
473
|
+
int endOffset = sentence.indexOf("[CONJ]");
|
474
|
+
if (endOffset == -1) {
|
475
|
+
endOffset = sentence.indexOf("[PSEU]");
|
476
|
+
}
|
477
|
+
if (endOffset == -1) {
|
478
|
+
endOffset = sentence.indexOf("[POST]");
|
479
|
+
}
|
480
|
+
if (endOffset == -1) {
|
481
|
+
endOffset = sentence.indexOf("[POSP]");
|
482
|
+
}
|
483
|
+
if (endOffset == -1 || endOffset < startOffset) {
|
484
|
+
endOffset = sentence.length() - 1;
|
485
|
+
}
|
486
|
+
sScope = sentence.substring(startOffset, endOffset + 1);
|
487
|
+
}
|
488
|
+
|
489
|
+
// Get the scope of the negation for POST and POSP
|
490
|
+
if (sentence.contains("[POST]") || sentence.contains("[POSP]")) {
|
491
|
+
int endOffset = sentence.lastIndexOf("[POST]");
|
492
|
+
if (endOffset == -1) {
|
493
|
+
endOffset = sentence.lastIndexOf("[POSP]");
|
494
|
+
}
|
495
|
+
|
496
|
+
int startOffset = sentence.lastIndexOf("[CONJ]");
|
497
|
+
if (startOffset == -1) {
|
498
|
+
startOffset = sentence.lastIndexOf("[PSEU]");
|
499
|
+
}
|
500
|
+
if (startOffset == -1) {
|
501
|
+
startOffset = sentence.lastIndexOf("[PREN]");
|
502
|
+
}
|
503
|
+
if (startOffset == -1) {
|
504
|
+
startOffset = sentence.lastIndexOf("[PREP]");
|
505
|
+
}
|
506
|
+
if (startOffset == -1) {
|
507
|
+
startOffset = 0;
|
508
|
+
}
|
509
|
+
sScope = sentence.substring(startOffset, endOffset);
|
510
|
+
}
|
511
|
+
|
512
|
+
// Classify to: negated/possible/affirmed
|
513
|
+
if (sentence.contains("[NEGATED]")) {
|
514
|
+
sentence = sentence + "\t" + "negated" + "\t" + sScope;
|
515
|
+
} else if (sentence.contains("[POSSIBLE]")) {
|
516
|
+
sentence = sentence + "\t" + "possible" + "\t" + sScope;
|
517
|
+
} else {
|
518
|
+
sentence = sentence + "\t" + "affirmed" + "\t" + sScope;
|
519
|
+
}
|
520
|
+
previousRunScope = sScope;
|
521
|
+
|
522
|
+
sToReturn = sentence;
|
523
|
+
|
524
|
+
return sToReturn;
|
525
|
+
}
|
526
|
+
|
527
|
+
public String getPreviousRunScope() {
|
528
|
+
return previousRunScope;
|
529
|
+
}
|
530
|
+
}
|