abstractor 1.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/MIT-LICENSE +20 -0
- data/README.md +14 -0
- data/Rakefile +33 -0
- data/app/assets/images/abstractor/add.png +0 -0
- data/app/assets/images/abstractor/ajax-loader.gif +0 -0
- data/app/assets/images/abstractor/bar.gif +0 -0
- data/app/assets/images/abstractor/bti_logo.jpg +0 -0
- data/app/assets/images/abstractor/bullet_arrow_down.png +0 -0
- data/app/assets/images/abstractor/bullet_arrow_up.png +0 -0
- data/app/assets/images/abstractor/cog.png +0 -0
- data/app/assets/images/abstractor/delete.png +0 -0
- data/app/assets/images/abstractor/edit.png +0 -0
- data/app/assets/images/abstractor/excel.png +0 -0
- data/app/assets/images/abstractor/favicon.ico +0 -0
- data/app/assets/images/abstractor/greencheck.gif +0 -0
- data/app/assets/images/abstractor/loading.gif +0 -0
- data/app/assets/images/abstractor/nu_logo.jpg +0 -0
- data/app/assets/images/abstractor/nubic_logo.png +0 -0
- data/app/assets/images/abstractor/page.png +0 -0
- data/app/assets/images/abstractor/rails.png +0 -0
- data/app/assets/images/abstractor/robert_h_lurie.jpg +0 -0
- data/app/assets/images/abstractor/show.png +0 -0
- data/app/assets/images/abstractor/switch_minus.gif +0 -0
- data/app/assets/images/abstractor/switch_plus.gif +0 -0
- data/app/assets/javascripts/abstractor/abstractor.js +89 -0
- data/app/assets/javascripts/abstractor/application.js +21 -0
- data/app/assets/javascripts/abstractor/combobox.js +301 -0
- data/app/assets/javascripts/abstractor/jquery/jquery.form.js +1074 -0
- data/app/assets/javascripts/abstractor/nested_attributes.js +69 -0
- data/app/assets/stylesheets/abstractor/abstractor_abstractions.css +312 -0
- data/app/assets/stylesheets/abstractor/application.css +21 -0
- data/app/assets/stylesheets/abstractor/jquery.ui.override.css.scss +73 -0
- data/app/controllers/abstractor/abstractor_abstraction_groups_controller.rb +5 -0
- data/app/controllers/abstractor/abstractor_abstractions_controller.rb +5 -0
- data/app/controllers/abstractor/abstractor_suggestions_controller.rb +5 -0
- data/app/controllers/abstractor/application_controller.rb +2 -0
- data/app/helpers/abstractor/application_helper.rb +4 -0
- data/app/models/abstractor/abstractor_abstraction.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_group.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_group_member.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema_object_value.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema_predicate_variant.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema_relation.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_source.rb +6 -0
- data/app/models/abstractor/abstractor_object_type.rb +6 -0
- data/app/models/abstractor/abstractor_object_value.rb +6 -0
- data/app/models/abstractor/abstractor_object_value_variant.rb +6 -0
- data/app/models/abstractor/abstractor_relation_type.rb +6 -0
- data/app/models/abstractor/abstractor_rule_type.rb +6 -0
- data/app/models/abstractor/abstractor_subject.rb +6 -0
- data/app/models/abstractor/abstractor_subject_group.rb +6 -0
- data/app/models/abstractor/abstractor_subject_group_member.rb +6 -0
- data/app/models/abstractor/abstractor_subject_relation.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion_object_value.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion_source.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion_status.rb +6 -0
- data/app/views/abstractor/abstractor_abstraction_groups/_form.html.haml +10 -0
- data/app/views/abstractor/abstractor_abstraction_groups/edit.html.haml +1 -0
- data/app/views/abstractor/abstractor_abstractions/_fields.html.haml +63 -0
- data/app/views/abstractor/abstractor_abstractions/_list.html.haml +45 -0
- data/app/views/abstractor/abstractor_abstractions/edit.html.haml +53 -0
- data/app/views/abstractor/abstractor_abstractions/show.html.haml +1 -0
- data/app/views/abstractor/shared/_error_messages.html.haml +5 -0
- data/config/cucumber.yml +8 -0
- data/config/routes.rb +7 -0
- data/db/migrate/20131227205140_create_abstractor_object_types.rb +10 -0
- data/db/migrate/20131227205219_create_abstractor_object_values.rb +10 -0
- data/db/migrate/20131227205256_create_abstractor_object_value_variants.rb +11 -0
- data/db/migrate/20131227205320_create_abstractor_relation_types.rb +10 -0
- data/db/migrate/20131227205354_create_abstractor_rule_types.rb +11 -0
- data/db/migrate/20131227205432_create_abstractor_abstraction_schemas.rb +13 -0
- data/db/migrate/20131227205456_create_abstractor_abstraction_schema_object_values.rb +10 -0
- data/db/migrate/20131227205529_create_abstractor_abstraction_schema_predicate_variants.rb +11 -0
- data/db/migrate/20131227205610_create_abstractor_abstraction_schema_relations.rb +11 -0
- data/db/migrate/20131227205652_create_abstractor_subjects.rb +12 -0
- data/db/migrate/20131227205732_create_abstractor_subject_relations.rb +11 -0
- data/db/migrate/20131227205831_create_abstractor_abstraction_sources.rb +11 -0
- data/db/migrate/20131227210211_create_abstractor_subject_groups.rb +10 -0
- data/db/migrate/20131227210244_create_abstractor_subject_group_members.rb +11 -0
- data/db/migrate/20131227210350_create_abstractor_abstraction_groups.rb +12 -0
- data/db/migrate/20131227210353_create_abstractor_abstraction_group_members.rb +11 -0
- data/db/migrate/20131227211050_create_abstractor_suggestion_statuses.rb +10 -0
- data/db/migrate/20131227211303_create_abstractor_abstractions.rb +15 -0
- data/db/migrate/20131227213427_create_abstractor_suggestions.rb +14 -0
- data/db/migrate/20131228041944_create_abstractor_suggestion_object_values.rb +11 -0
- data/db/migrate/20131228041945_create_abstractor_suggestion_sources.rb +16 -0
- data/db/seeds.rb +0 -0
- data/lib/abstractor.rb +8 -0
- data/lib/abstractor/abstractable.rb +190 -0
- data/lib/abstractor/core_ext/string.rb +99 -0
- data/lib/abstractor/engine.rb +14 -0
- data/lib/abstractor/methods/controllers/abstractor_abstraction_groups_controller.rb +37 -0
- data/lib/abstractor/methods/controllers/abstractor_abstractions_controller.rb +42 -0
- data/lib/abstractor/methods/controllers/abstractor_suggestions_controller.rb +28 -0
- data/lib/abstractor/methods/models/abstractor_abstraction.rb +65 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_group.rb +37 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_group_member.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema.rb +27 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema_object_value.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema_predicate_variant.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema_relation.rb +18 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_source.rb +29 -0
- data/lib/abstractor/methods/models/abstractor_object_type.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_object_value.rb +24 -0
- data/lib/abstractor/methods/models/abstractor_object_value_variant.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_relation_type.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_rule_type.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_subject.rb +258 -0
- data/lib/abstractor/methods/models/abstractor_subject_group.rb +19 -0
- data/lib/abstractor/methods/models/abstractor_subject_group_member.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_subject_relation.rb +18 -0
- data/lib/abstractor/methods/models/abstractor_suggestion.rb +88 -0
- data/lib/abstractor/methods/models/abstractor_suggestion_object_value.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_suggestion_source.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_suggestion_status.rb +29 -0
- data/lib/abstractor/methods/models/soft_delete.rb +35 -0
- data/lib/abstractor/negation_detection.rb +43 -0
- data/lib/abstractor/parser.rb +76 -0
- data/lib/abstractor/setup.rb +24 -0
- data/lib/abstractor/user_interface.rb +40 -0
- data/lib/abstractor/utility.rb +8 -0
- data/lib/abstractor/version.rb +3 -0
- data/lib/generators/abstractor/install/install_generator.rb +118 -0
- data/lib/generators/abstractor/install/templates/controllers/abstractor_abstraction_groups_controller.rb +16 -0
- data/lib/generators/abstractor/install/templates/controllers/abstractor_abstractions_controller.rb +24 -0
- data/lib/generators/abstractor/install/templates/controllers/abstractor_suggestions_controller.rb +12 -0
- data/lib/generators/abstractor/install/templates/dictionaries.yml +235 -0
- data/lib/generators/abstractor/install/templates/helpers/abstractions_helper.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group_member.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_object_value.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_predicate_variant.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_relation.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_source.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_object_type.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_object_value.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_object_value_variant.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_relation_type.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_rule_type.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject_group.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject_group_member.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject_relation.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_object_value.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_source.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_status.rb +9 -0
- data/lib/lingscope/build.xml +74 -0
- data/lib/lingscope/build/built-jar.properties +4 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/AbnerTokenizer.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/AnnotationComparer.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/Annotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/BaselineAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/BaselineCueAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/BaselineScopeAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/CrfAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/NegexAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/NegexCueAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/NegexScopeAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/PosTagger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/negex/GenNegEx.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/negex/Sorter.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/AnnotatedFilesMerger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/AnnotationComparerDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/BaselineDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/CrfDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/CueAndPosFilesMerger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/ModelTrainer.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/NegexDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/PosTaggerDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/SentencePosTagger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/SentenceTagger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/io/AnnotatedSentencesIO.class +0 -0
- data/lib/lingscope/build/classes/lingscope/structures/AnnotatedSentence.class +0 -0
- data/lib/lingscope/dist/README.TXT +32 -0
- data/lib/lingscope/dist/javadoc/allclasses-frame.html +80 -0
- data/lib/lingscope/dist/javadoc/allclasses-noframe.html +80 -0
- data/lib/lingscope/dist/javadoc/constant-values.html +199 -0
- data/lib/lingscope/dist/javadoc/deprecated-list.html +147 -0
- data/lib/lingscope/dist/javadoc/help-doc.html +224 -0
- data/lib/lingscope/dist/javadoc/index-files/index-1.html +188 -0
- data/lib/lingscope/dist/javadoc/index-files/index-10.html +149 -0
- data/lib/lingscope/dist/javadoc/index-files/index-11.html +158 -0
- data/lib/lingscope/dist/javadoc/index-files/index-12.html +157 -0
- data/lib/lingscope/dist/javadoc/index-files/index-13.html +177 -0
- data/lib/lingscope/dist/javadoc/index-files/index-14.html +155 -0
- data/lib/lingscope/dist/javadoc/index-files/index-15.html +152 -0
- data/lib/lingscope/dist/javadoc/index-files/index-16.html +146 -0
- data/lib/lingscope/dist/javadoc/index-files/index-2.html +158 -0
- data/lib/lingscope/dist/javadoc/index-files/index-3.html +165 -0
- data/lib/lingscope/dist/javadoc/index-files/index-4.html +146 -0
- data/lib/lingscope/dist/javadoc/index-files/index-5.html +219 -0
- data/lib/lingscope/dist/javadoc/index-files/index-6.html +149 -0
- data/lib/lingscope/dist/javadoc/index-files/index-7.html +155 -0
- data/lib/lingscope/dist/javadoc/index-files/index-8.html +185 -0
- data/lib/lingscope/dist/javadoc/index-files/index-9.html +164 -0
- data/lib/lingscope/dist/javadoc/index.html +74 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/AbnerTokenizer.html +280 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/AnnotationComparer.html +526 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/Annotator.html +401 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineAnnotator.html +375 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineCueAnnotator.html +309 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineScopeAnnotator.html +340 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/CrfAnnotator.html +340 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexAnnotator.html +364 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexCueAnnotator.html +309 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexScopeAnnotator.html +309 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/PosTagger.html +268 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AbnerTokenizer.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AnnotationComparer.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/Annotator.html +299 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineAnnotator.html +189 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineCueAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineScopeAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/CrfAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexAnnotator.html +189 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexCueAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexScopeAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/PosTagger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/GenNegEx.html +369 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/Sorter.html +253 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/GenNegEx.html +181 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/Sorter.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-frame.html +35 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-summary.html +165 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-use.html +174 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-frame.html +53 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-summary.html +198 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-tree.html +160 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-use.html +202 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotatedFilesMerger.html +284 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotationComparerDriver.html +258 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/BaselineDriver.html +260 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/CrfDriver.html +262 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/CueAndPosFilesMerger.html +310 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/ModelTrainer.html +281 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/NegexDriver.html +262 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/PosTaggerDriver.html +289 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/SentencePosTagger.html +313 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/SentenceTagger.html +512 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotatedFilesMerger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotationComparerDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/BaselineDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CrfDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CueAndPosFilesMerger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/ModelTrainer.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/NegexDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/PosTaggerDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentencePosTagger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentenceTagger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-frame.html +51 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-summary.html +195 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-use.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/AnnotatedSentencesIO.html +285 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/class-use/AnnotatedSentencesIO.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-frame.html +33 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-summary.html +158 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-use.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/AnnotatedSentence.html +425 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/class-use/AnnotatedSentence.html +423 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-frame.html +33 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-summary.html +158 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-use.html +209 -0
- data/lib/lingscope/dist/javadoc/overview-frame.html +51 -0
- data/lib/lingscope/dist/javadoc/overview-summary.html +168 -0
- data/lib/lingscope/dist/javadoc/overview-tree.html +159 -0
- data/lib/lingscope/dist/javadoc/package-list +5 -0
- data/lib/lingscope/dist/javadoc/resources/inherit.gif +0 -0
- data/lib/lingscope/dist/javadoc/stylesheet.css +29 -0
- data/lib/lingscope/dist/lib/abner.jar +0 -0
- data/lib/lingscope/dist/lib/commons-lang-2.4.jar +0 -0
- data/lib/lingscope/dist/lib/java_utils.jar +0 -0
- data/lib/lingscope/dist/lib/sptoolkit.jar +0 -0
- data/lib/lingscope/dist/lib/stanford-postagger.jar +0 -0
- data/lib/lingscope/dist/lingscope.jar +0 -0
- data/lib/lingscope/lingscope.zip +0 -0
- data/lib/lingscope/manifest.mf +3 -0
- data/lib/lingscope/nbproject/build-impl.xml +1338 -0
- data/lib/lingscope/nbproject/genfiles.properties +8 -0
- data/lib/lingscope/nbproject/private/config.properties +0 -0
- data/lib/lingscope/nbproject/private/private.properties +10 -0
- data/lib/lingscope/nbproject/private/private.xml +5 -0
- data/lib/lingscope/nbproject/project.properties +85 -0
- data/lib/lingscope/nbproject/project.xml +15 -0
- data/lib/lingscope/negation_models.zip +0 -0
- data/lib/lingscope/negation_models/baseline_cue_all_both.model +49 -0
- data/lib/lingscope/negation_models/baseline_cue_all_clinical.model +19 -0
- data/lib/lingscope/negation_models/crf_cue_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_cue_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_crf_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_crf_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_regex_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_regex_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_crf_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_crf_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_regex_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_regex_all_clinical.model +0 -0
- data/lib/lingscope/src/lingscope/algorithms/AbnerTokenizer.java +58 -0
- data/lib/lingscope/src/lingscope/algorithms/AnnotationComparer.java +207 -0
- data/lib/lingscope/src/lingscope/algorithms/Annotator.java +51 -0
- data/lib/lingscope/src/lingscope/algorithms/BaselineAnnotator.java +80 -0
- data/lib/lingscope/src/lingscope/algorithms/BaselineCueAnnotator.java +84 -0
- data/lib/lingscope/src/lingscope/algorithms/BaselineScopeAnnotator.java +101 -0
- data/lib/lingscope/src/lingscope/algorithms/CrfAnnotator.java +45 -0
- data/lib/lingscope/src/lingscope/algorithms/NegexAnnotator.java +52 -0
- data/lib/lingscope/src/lingscope/algorithms/NegexCueAnnotator.java +26 -0
- data/lib/lingscope/src/lingscope/algorithms/NegexScopeAnnotator.java +26 -0
- data/lib/lingscope/src/lingscope/algorithms/PosTagger.java +54 -0
- data/lib/lingscope/src/lingscope/algorithms/negex/GenNegEx.java +530 -0
- data/lib/lingscope/src/lingscope/algorithms/negex/Sorter.java +36 -0
- data/lib/lingscope/src/lingscope/drivers/AnnotatedFilesMerger.java +61 -0
- data/lib/lingscope/src/lingscope/drivers/AnnotationComparerDriver.java +22 -0
- data/lib/lingscope/src/lingscope/drivers/BaselineDriver.java +45 -0
- data/lib/lingscope/src/lingscope/drivers/CrfDriver.java +31 -0
- data/lib/lingscope/src/lingscope/drivers/CueAndPosFilesMerger.java +86 -0
- data/lib/lingscope/src/lingscope/drivers/ModelTrainer.java +39 -0
- data/lib/lingscope/src/lingscope/drivers/NegexDriver.java +32 -0
- data/lib/lingscope/src/lingscope/drivers/PosTaggerDriver.java +62 -0
- data/lib/lingscope/src/lingscope/drivers/SentencePosTagger.java +89 -0
- data/lib/lingscope/src/lingscope/drivers/SentenceTagger.java +158 -0
- data/lib/lingscope/src/lingscope/io/AnnotatedSentencesIO.java +53 -0
- data/lib/lingscope/src/lingscope/structures/AnnotatedSentence.java +105 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.csv +1 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.txt +2273 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.csv +1 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.txt +1342 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/icd-o3 readme.txt +21 -0
- data/lib/setup/data/custom_site_synonyms.csv +1 -0
- data/lib/setup/data/diagnosis_categorizations.csv +1 -0
- data/lib/setup/data/high_level_primary_cns_diagnosis_categorizations.yml +597 -0
- data/lib/setup/data/icdo3_sites_with_laterality.txt +11 -0
- data/lib/setup/data/site_site_categories.txt +28 -0
- data/lib/tasks/abstractor_tasks.rake +22 -0
- data/lib/tasks/cucumber.rake +65 -0
- metadata +754 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
package lingscope.algorithms;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import lingscope.structures.AnnotatedSentence;
|
5
|
+
|
6
|
+
/**
|
7
|
+
*
|
8
|
+
* @author shashank
|
9
|
+
*/
|
10
|
+
public abstract class Annotator {
|
11
|
+
|
12
|
+
protected String beginTag;
|
13
|
+
protected String interTag;
|
14
|
+
protected String otherTag;
|
15
|
+
|
16
|
+
public Annotator(String beginTag, String interTag, String otherTag) {
|
17
|
+
this.beginTag = beginTag;
|
18
|
+
this.interTag = interTag;
|
19
|
+
this.otherTag = otherTag;
|
20
|
+
}
|
21
|
+
|
22
|
+
public abstract void serializeAnnotator(String trainingFile, String modelFile);
|
23
|
+
|
24
|
+
public abstract AnnotatedSentence annotateSentence(String sentence, boolean isTokenized);
|
25
|
+
|
26
|
+
public abstract void loadAnnotator(String modelFile);
|
27
|
+
|
28
|
+
/**
|
29
|
+
* Checks if the given target phrase is negated in the given sentence. Only
|
30
|
+
* the first word of the target phrase is used
|
31
|
+
* @param sentence
|
32
|
+
* @param isTokenized
|
33
|
+
* @param targetPhrase
|
34
|
+
* @return
|
35
|
+
*/
|
36
|
+
public boolean isTargetNegated(String sentence, boolean isTokenized, String targetPhrase) {
|
37
|
+
AnnotatedSentence annotatedSentence = annotateSentence(sentence, isTokenized);
|
38
|
+
String[] targetPhraseWords = targetPhrase.split("\\s+");
|
39
|
+
List<String> words = annotatedSentence.getWords();
|
40
|
+
List<Boolean> areNegated = annotatedSentence.getIsAnnotatedTags();
|
41
|
+
int index = 0;
|
42
|
+
for (String word : words) {
|
43
|
+
if (targetPhraseWords[0].equalsIgnoreCase(word)) {
|
44
|
+
return areNegated.get(index);
|
45
|
+
}
|
46
|
+
++index;
|
47
|
+
}
|
48
|
+
System.err.println("Phrase not found: " + targetPhrase);
|
49
|
+
return false;
|
50
|
+
}
|
51
|
+
}
|
@@ -0,0 +1,80 @@
|
|
1
|
+
package lingscope.algorithms;
|
2
|
+
|
3
|
+
import generalutils.FileOperations;
|
4
|
+
import java.util.ArrayList;
|
5
|
+
import java.util.HashSet;
|
6
|
+
import java.util.List;
|
7
|
+
import java.util.Set;
|
8
|
+
import java.util.logging.Level;
|
9
|
+
import java.util.logging.Logger;
|
10
|
+
|
11
|
+
/**
|
12
|
+
* The baseline annotator
|
13
|
+
* @author shashank
|
14
|
+
*/
|
15
|
+
public abstract class BaselineAnnotator extends Annotator {
|
16
|
+
|
17
|
+
protected Set<String> phrases;
|
18
|
+
|
19
|
+
public BaselineAnnotator(String beginTag, String interTag, String otherTag) {
|
20
|
+
super(beginTag, interTag, otherTag);
|
21
|
+
phrases = null;
|
22
|
+
}
|
23
|
+
|
24
|
+
@Override
|
25
|
+
public void serializeAnnotator(String trainingFile, String modelFile) {
|
26
|
+
try {
|
27
|
+
phrases = new HashSet<String>();
|
28
|
+
List<String> taggedSentences = FileOperations.readFile(trainingFile);
|
29
|
+
for (String taggedSentence : taggedSentences) {
|
30
|
+
phrases.addAll(getCueWords(taggedSentence, beginTag, interTag, otherTag));
|
31
|
+
}
|
32
|
+
FileOperations.writeFile(modelFile, new ArrayList<String>(phrases));
|
33
|
+
} catch (Exception ex) {
|
34
|
+
Logger.getLogger(BaselineAnnotator.class.getName()).log(Level.SEVERE, null, ex);
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
@Override
|
39
|
+
public void loadAnnotator(String modelFile) {
|
40
|
+
try {
|
41
|
+
phrases = new HashSet<String>();
|
42
|
+
phrases.addAll(FileOperations.readFile(modelFile));
|
43
|
+
} catch (Exception ex) {
|
44
|
+
Logger.getLogger(BaselineAnnotator.class.getName()).log(Level.SEVERE, null, ex);
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
/**
|
49
|
+
* Gets the set of cue word phrases in the given sentence. The given
|
50
|
+
* sentence is tagged according to Abner's specifications
|
51
|
+
* @param abnerTaggedSentence sentence tagged by abner's specification.
|
52
|
+
* @param beginTag the tag to mark the beginning of the cue
|
53
|
+
* @param intermediateTag the tag to mark intermediate portions
|
54
|
+
* @param otherTag the other tag
|
55
|
+
* @return the set of cue words or phrases in the given sentence
|
56
|
+
*/
|
57
|
+
public static Set<String> getCueWords(String abnerTaggedSentence, String beginTag, String intermediateTag, String otherTag) {
|
58
|
+
Set<String> cueWordsPhrases = new HashSet<String>(1);
|
59
|
+
String[] elements = abnerTaggedSentence.split(" +");
|
60
|
+
boolean collect = false;
|
61
|
+
StringBuilder collectedPhrase = new StringBuilder();
|
62
|
+
for (String element : elements) {
|
63
|
+
String[] elementTokens = element.split("\\|");
|
64
|
+
String word = elementTokens[0];
|
65
|
+
String tag = elementTokens[1];
|
66
|
+
if (tag.equalsIgnoreCase(beginTag)) {
|
67
|
+
collect = true;
|
68
|
+
collectedPhrase.append(word).append(" ");
|
69
|
+
} else if (tag.equalsIgnoreCase(intermediateTag)) {
|
70
|
+
collectedPhrase.append(word).append(" ");
|
71
|
+
} else if (tag.equalsIgnoreCase(otherTag) && collect) {
|
72
|
+
collect = false;
|
73
|
+
cueWordsPhrases.add(collectedPhrase.toString().trim().toLowerCase());
|
74
|
+
collectedPhrase.delete(0, collectedPhrase.length() - 1);
|
75
|
+
}
|
76
|
+
}
|
77
|
+
return cueWordsPhrases;
|
78
|
+
}
|
79
|
+
|
80
|
+
}
|
@@ -0,0 +1,84 @@
|
|
1
|
+
package lingscope.algorithms;
|
2
|
+
|
3
|
+
import java.util.HashSet;
|
4
|
+
import java.util.Set;
|
5
|
+
import lingscope.structures.AnnotatedSentence;
|
6
|
+
|
7
|
+
/**
|
8
|
+
*
|
9
|
+
* @author shashank
|
10
|
+
*/
|
11
|
+
public class BaselineCueAnnotator extends BaselineAnnotator {
|
12
|
+
|
13
|
+
public BaselineCueAnnotator(String beginTag, String interTag, String otherTag) {
|
14
|
+
super(beginTag, interTag, otherTag);
|
15
|
+
}
|
16
|
+
|
17
|
+
@Override
|
18
|
+
public AnnotatedSentence annotateSentence(String sentence, boolean isTokenized) {
|
19
|
+
if (phrases == null) {
|
20
|
+
throw new RuntimeException("Annotator has not been loaded");
|
21
|
+
}
|
22
|
+
if (!isTokenized) {
|
23
|
+
sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
|
24
|
+
}
|
25
|
+
String lcSentence = sentence.toLowerCase();
|
26
|
+
String[] words = sentence.split(" +");
|
27
|
+
int wordsLength = words.length;
|
28
|
+
|
29
|
+
Set<Integer> addITag = new HashSet<Integer>();
|
30
|
+
Set<Integer> addBTag = new HashSet<Integer>();
|
31
|
+
|
32
|
+
// Collect all indices where beginning and intermediate tags should
|
33
|
+
// be added
|
34
|
+
for (String phrase : phrases) {
|
35
|
+
if (!lcSentence.contains(phrase)) {
|
36
|
+
continue;
|
37
|
+
}
|
38
|
+
String[] phraseWords = phrase.split(" +");
|
39
|
+
|
40
|
+
for (int wordCounter = 0; wordCounter < wordsLength; ++wordCounter) {
|
41
|
+
String word = words[wordCounter];
|
42
|
+
if (word.equalsIgnoreCase(phraseWords[0])) {
|
43
|
+
boolean phraseMatches = true;
|
44
|
+
for (int j = 0; j < phraseWords.length; ++j) {
|
45
|
+
int i = j + wordCounter;
|
46
|
+
if (i == wordsLength) {
|
47
|
+
phraseMatches = false;
|
48
|
+
break;
|
49
|
+
}
|
50
|
+
if (!phraseWords[j].equalsIgnoreCase(words[i])) {
|
51
|
+
phraseMatches = false;
|
52
|
+
break;
|
53
|
+
}
|
54
|
+
}
|
55
|
+
|
56
|
+
if (phraseMatches) {
|
57
|
+
addBTag.add(wordCounter);
|
58
|
+
for (int j = 1; j < phraseWords.length; ++j) {
|
59
|
+
addITag.add(j + wordCounter);
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
// Create a tagged sentence. Give preference to beginning tag over
|
67
|
+
// intermediate tag in case they clash
|
68
|
+
StringBuilder taggedSentence = new StringBuilder();
|
69
|
+
for (int i = 0; i < wordsLength; ++i) {
|
70
|
+
String word = words[i];
|
71
|
+
if (addBTag.contains(i)) {
|
72
|
+
taggedSentence.append(" ").append(word).append("|").append(beginTag);
|
73
|
+
} else if (addITag.contains(i)) {
|
74
|
+
taggedSentence.append(" ").append(word).append("|").append(interTag);
|
75
|
+
} else {
|
76
|
+
taggedSentence.append(" ").append(word).append("|").append(otherTag);
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
String raw = taggedSentence.substring(1);
|
81
|
+
return new AnnotatedSentence(raw);
|
82
|
+
}
|
83
|
+
|
84
|
+
}
|
@@ -0,0 +1,101 @@
|
|
1
|
+
package lingscope.algorithms;
|
2
|
+
|
3
|
+
import java.util.HashSet;
|
4
|
+
import java.util.Set;
|
5
|
+
import java.util.regex.Matcher;
|
6
|
+
import java.util.regex.Pattern;
|
7
|
+
import lingscope.structures.AnnotatedSentence;
|
8
|
+
|
9
|
+
/**
|
10
|
+
*
|
11
|
+
* @author shashank
|
12
|
+
*/
|
13
|
+
public class BaselineScopeAnnotator extends BaselineAnnotator {
|
14
|
+
|
15
|
+
protected Pattern endPattern;
|
16
|
+
|
17
|
+
public BaselineScopeAnnotator(String beginTag, String interTag, String otherTag, boolean commaLimit, boolean periodLimit) {
|
18
|
+
super(beginTag, interTag, otherTag);
|
19
|
+
String endPatternString = "";
|
20
|
+
if (commaLimit && periodLimit) {
|
21
|
+
endPatternString = ",|\\.";
|
22
|
+
} else if (commaLimit) {
|
23
|
+
endPatternString = ",";
|
24
|
+
} else if (periodLimit) {
|
25
|
+
endPatternString = "\\.";
|
26
|
+
}
|
27
|
+
endPattern = Pattern.compile(endPatternString); // End tagging if a comma or period is seen
|
28
|
+
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
public AnnotatedSentence annotateSentence(String sentence, boolean isTokenized) {
|
33
|
+
if (phrases == null) {
|
34
|
+
throw new RuntimeException("Annotator has not been loaded");
|
35
|
+
}
|
36
|
+
if (!isTokenized) {
|
37
|
+
sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
|
38
|
+
}
|
39
|
+
String lcSentence = sentence.toLowerCase();
|
40
|
+
String[] words = sentence.split(" +");
|
41
|
+
int wordsLength = words.length;
|
42
|
+
|
43
|
+
Set<Integer> addBTag = new HashSet<Integer>();
|
44
|
+
|
45
|
+
// Collect all indices where beginning and intermediate tags should
|
46
|
+
// be added
|
47
|
+
for (String phrase : phrases) {
|
48
|
+
if (!lcSentence.contains(phrase)) {
|
49
|
+
continue;
|
50
|
+
}
|
51
|
+
String[] phraseWords = phrase.split(" +");
|
52
|
+
|
53
|
+
for (int wordCounter = 0; wordCounter < wordsLength; ++wordCounter) {
|
54
|
+
String word = words[wordCounter];
|
55
|
+
if (word.equalsIgnoreCase(phraseWords[0])) {
|
56
|
+
boolean phraseMatches = true;
|
57
|
+
for (int j = 0; j < phraseWords.length; ++j) {
|
58
|
+
int i = j + wordCounter;
|
59
|
+
if (i == wordsLength) {
|
60
|
+
phraseMatches = false;
|
61
|
+
break;
|
62
|
+
}
|
63
|
+
if (!phraseWords[j].equalsIgnoreCase(words[i])) {
|
64
|
+
phraseMatches = false;
|
65
|
+
break;
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
if (phraseMatches) {
|
70
|
+
addBTag.add(wordCounter);
|
71
|
+
}
|
72
|
+
}
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
// Create a tagged sentence. Give preference to beginning tag over
|
77
|
+
// intermediate tag in case they clash
|
78
|
+
StringBuilder taggedSentence = new StringBuilder();
|
79
|
+
boolean taggerOn = false;
|
80
|
+
for (int i = 0; i < wordsLength; ++i) {
|
81
|
+
String word = words[i];
|
82
|
+
if (addBTag.contains(i)) {
|
83
|
+
taggedSentence.append(" ").append(word).append("|").append(beginTag);
|
84
|
+
taggerOn = true;
|
85
|
+
} else if (taggerOn) {
|
86
|
+
Matcher endMatch = endPattern.matcher(word);
|
87
|
+
if (endMatch.matches()) {
|
88
|
+
taggedSentence.append(" ").append(word).append("|").append(otherTag);
|
89
|
+
taggerOn = false;
|
90
|
+
} else {
|
91
|
+
taggedSentence.append(" ").append(word).append("|").append(interTag);
|
92
|
+
}
|
93
|
+
} else {
|
94
|
+
taggedSentence.append(" ").append(word).append("|").append(otherTag);
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
String raw = taggedSentence.substring(1);
|
99
|
+
return new AnnotatedSentence(raw);
|
100
|
+
}
|
101
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
package lingscope.algorithms;
|
2
|
+
|
3
|
+
import abner.Tagger;
|
4
|
+
import abner.Trainer;
|
5
|
+
import java.io.File;
|
6
|
+
import lingscope.structures.AnnotatedSentence;
|
7
|
+
|
8
|
+
/**
|
9
|
+
* A CRF based annotator
|
10
|
+
* @author shashank
|
11
|
+
*/
|
12
|
+
public class CrfAnnotator extends Annotator {
|
13
|
+
|
14
|
+
private Tagger tagger;
|
15
|
+
|
16
|
+
public CrfAnnotator(String beginTag, String interTag, String otherTag) {
|
17
|
+
super(beginTag, interTag, otherTag);
|
18
|
+
tagger = null;
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public void serializeAnnotator(String trainingFile, String modelFile) {
|
23
|
+
Trainer trainer = new Trainer();
|
24
|
+
trainer.train(trainingFile, modelFile);
|
25
|
+
loadAnnotator(modelFile);
|
26
|
+
}
|
27
|
+
|
28
|
+
@Override
|
29
|
+
public AnnotatedSentence annotateSentence(String sentence, boolean isTokenized) {
|
30
|
+
if (tagger == null) {
|
31
|
+
throw new RuntimeException("Tagger has not been loaded");
|
32
|
+
}
|
33
|
+
if (!isTokenized) {
|
34
|
+
sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
|
35
|
+
}
|
36
|
+
String raw = tagger.tagABNER(sentence).trim();
|
37
|
+
return new AnnotatedSentence(raw);
|
38
|
+
}
|
39
|
+
|
40
|
+
@Override
|
41
|
+
public void loadAnnotator(String modelFile) {
|
42
|
+
tagger = new Tagger(new File(modelFile));
|
43
|
+
tagger.setTokenization(false);
|
44
|
+
}
|
45
|
+
}
|
@@ -0,0 +1,52 @@
|
|
1
|
+
/*
|
2
|
+
* To change this template, choose Tools | Templates
|
3
|
+
* and open the template in the editor.
|
4
|
+
*/
|
5
|
+
|
6
|
+
package lingscope.algorithms;
|
7
|
+
|
8
|
+
import java.io.File;
|
9
|
+
import java.io.FileNotFoundException;
|
10
|
+
import java.util.ArrayList;
|
11
|
+
import java.util.Scanner;
|
12
|
+
import java.util.Set;
|
13
|
+
import java.util.logging.Level;
|
14
|
+
import java.util.logging.Logger;
|
15
|
+
import lingscope.algorithms.negex.GenNegEx;
|
16
|
+
import lingscope.structures.AnnotatedSentence;
|
17
|
+
|
18
|
+
/**
|
19
|
+
* Annotates negation using Negex
|
20
|
+
* @author shashank
|
21
|
+
*/
|
22
|
+
public abstract class NegexAnnotator extends Annotator {
|
23
|
+
|
24
|
+
protected GenNegEx negex;
|
25
|
+
protected ArrayList<String> rules;
|
26
|
+
|
27
|
+
public NegexAnnotator(String beginTag, String interTag, String otherTag) {
|
28
|
+
super(beginTag, interTag, otherTag);
|
29
|
+
negex = null;
|
30
|
+
}
|
31
|
+
|
32
|
+
@Override
|
33
|
+
public void serializeAnnotator(String trainingFile, String modelFile) {
|
34
|
+
throw new UnsupportedOperationException("NegEx's serialized version can be downloaded from the internet.");
|
35
|
+
}
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public void loadAnnotator(String modelFile) {
|
39
|
+
try {
|
40
|
+
negex = new GenNegEx();
|
41
|
+
File ruleFile = new File(modelFile);
|
42
|
+
Scanner sc = new Scanner(ruleFile);
|
43
|
+
rules = new ArrayList();
|
44
|
+
while (sc.hasNextLine()) {
|
45
|
+
rules.add(sc.nextLine());
|
46
|
+
}
|
47
|
+
sc.close();
|
48
|
+
} catch (FileNotFoundException ex) {
|
49
|
+
Logger.getLogger(NegexAnnotator.class.getName()).log(Level.SEVERE, null, ex);
|
50
|
+
}
|
51
|
+
}
|
52
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
package lingscope.algorithms;
|
2
|
+
|
3
|
+
import lingscope.structures.AnnotatedSentence;
|
4
|
+
|
5
|
+
/**
|
6
|
+
*
|
7
|
+
* @author shashank
|
8
|
+
*/
|
9
|
+
public class NegexCueAnnotator extends NegexAnnotator {
|
10
|
+
|
11
|
+
public NegexCueAnnotator(String beginTag, String interTag, String otherTag) {
|
12
|
+
super(beginTag, interTag, otherTag);
|
13
|
+
}
|
14
|
+
|
15
|
+
@Override
|
16
|
+
public AnnotatedSentence annotateSentence(String sentence, boolean isTokenized) {
|
17
|
+
if (negex == null) {
|
18
|
+
throw new RuntimeException("Annotator has not been loaded");
|
19
|
+
}
|
20
|
+
if (!isTokenized) {
|
21
|
+
sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
|
22
|
+
}
|
23
|
+
String raw = negex.getCue(sentence, rules, beginTag, interTag, otherTag);
|
24
|
+
return new AnnotatedSentence(raw);
|
25
|
+
}
|
26
|
+
}
|