abstractor 1.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/MIT-LICENSE +20 -0
- data/README.md +14 -0
- data/Rakefile +33 -0
- data/app/assets/images/abstractor/add.png +0 -0
- data/app/assets/images/abstractor/ajax-loader.gif +0 -0
- data/app/assets/images/abstractor/bar.gif +0 -0
- data/app/assets/images/abstractor/bti_logo.jpg +0 -0
- data/app/assets/images/abstractor/bullet_arrow_down.png +0 -0
- data/app/assets/images/abstractor/bullet_arrow_up.png +0 -0
- data/app/assets/images/abstractor/cog.png +0 -0
- data/app/assets/images/abstractor/delete.png +0 -0
- data/app/assets/images/abstractor/edit.png +0 -0
- data/app/assets/images/abstractor/excel.png +0 -0
- data/app/assets/images/abstractor/favicon.ico +0 -0
- data/app/assets/images/abstractor/greencheck.gif +0 -0
- data/app/assets/images/abstractor/loading.gif +0 -0
- data/app/assets/images/abstractor/nu_logo.jpg +0 -0
- data/app/assets/images/abstractor/nubic_logo.png +0 -0
- data/app/assets/images/abstractor/page.png +0 -0
- data/app/assets/images/abstractor/rails.png +0 -0
- data/app/assets/images/abstractor/robert_h_lurie.jpg +0 -0
- data/app/assets/images/abstractor/show.png +0 -0
- data/app/assets/images/abstractor/switch_minus.gif +0 -0
- data/app/assets/images/abstractor/switch_plus.gif +0 -0
- data/app/assets/javascripts/abstractor/abstractor.js +89 -0
- data/app/assets/javascripts/abstractor/application.js +21 -0
- data/app/assets/javascripts/abstractor/combobox.js +301 -0
- data/app/assets/javascripts/abstractor/jquery/jquery.form.js +1074 -0
- data/app/assets/javascripts/abstractor/nested_attributes.js +69 -0
- data/app/assets/stylesheets/abstractor/abstractor_abstractions.css +312 -0
- data/app/assets/stylesheets/abstractor/application.css +21 -0
- data/app/assets/stylesheets/abstractor/jquery.ui.override.css.scss +73 -0
- data/app/controllers/abstractor/abstractor_abstraction_groups_controller.rb +5 -0
- data/app/controllers/abstractor/abstractor_abstractions_controller.rb +5 -0
- data/app/controllers/abstractor/abstractor_suggestions_controller.rb +5 -0
- data/app/controllers/abstractor/application_controller.rb +2 -0
- data/app/helpers/abstractor/application_helper.rb +4 -0
- data/app/models/abstractor/abstractor_abstraction.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_group.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_group_member.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema_object_value.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema_predicate_variant.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_schema_relation.rb +6 -0
- data/app/models/abstractor/abstractor_abstraction_source.rb +6 -0
- data/app/models/abstractor/abstractor_object_type.rb +6 -0
- data/app/models/abstractor/abstractor_object_value.rb +6 -0
- data/app/models/abstractor/abstractor_object_value_variant.rb +6 -0
- data/app/models/abstractor/abstractor_relation_type.rb +6 -0
- data/app/models/abstractor/abstractor_rule_type.rb +6 -0
- data/app/models/abstractor/abstractor_subject.rb +6 -0
- data/app/models/abstractor/abstractor_subject_group.rb +6 -0
- data/app/models/abstractor/abstractor_subject_group_member.rb +6 -0
- data/app/models/abstractor/abstractor_subject_relation.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion_object_value.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion_source.rb +6 -0
- data/app/models/abstractor/abstractor_suggestion_status.rb +6 -0
- data/app/views/abstractor/abstractor_abstraction_groups/_form.html.haml +10 -0
- data/app/views/abstractor/abstractor_abstraction_groups/edit.html.haml +1 -0
- data/app/views/abstractor/abstractor_abstractions/_fields.html.haml +63 -0
- data/app/views/abstractor/abstractor_abstractions/_list.html.haml +45 -0
- data/app/views/abstractor/abstractor_abstractions/edit.html.haml +53 -0
- data/app/views/abstractor/abstractor_abstractions/show.html.haml +1 -0
- data/app/views/abstractor/shared/_error_messages.html.haml +5 -0
- data/config/cucumber.yml +8 -0
- data/config/routes.rb +7 -0
- data/db/migrate/20131227205140_create_abstractor_object_types.rb +10 -0
- data/db/migrate/20131227205219_create_abstractor_object_values.rb +10 -0
- data/db/migrate/20131227205256_create_abstractor_object_value_variants.rb +11 -0
- data/db/migrate/20131227205320_create_abstractor_relation_types.rb +10 -0
- data/db/migrate/20131227205354_create_abstractor_rule_types.rb +11 -0
- data/db/migrate/20131227205432_create_abstractor_abstraction_schemas.rb +13 -0
- data/db/migrate/20131227205456_create_abstractor_abstraction_schema_object_values.rb +10 -0
- data/db/migrate/20131227205529_create_abstractor_abstraction_schema_predicate_variants.rb +11 -0
- data/db/migrate/20131227205610_create_abstractor_abstraction_schema_relations.rb +11 -0
- data/db/migrate/20131227205652_create_abstractor_subjects.rb +12 -0
- data/db/migrate/20131227205732_create_abstractor_subject_relations.rb +11 -0
- data/db/migrate/20131227205831_create_abstractor_abstraction_sources.rb +11 -0
- data/db/migrate/20131227210211_create_abstractor_subject_groups.rb +10 -0
- data/db/migrate/20131227210244_create_abstractor_subject_group_members.rb +11 -0
- data/db/migrate/20131227210350_create_abstractor_abstraction_groups.rb +12 -0
- data/db/migrate/20131227210353_create_abstractor_abstraction_group_members.rb +11 -0
- data/db/migrate/20131227211050_create_abstractor_suggestion_statuses.rb +10 -0
- data/db/migrate/20131227211303_create_abstractor_abstractions.rb +15 -0
- data/db/migrate/20131227213427_create_abstractor_suggestions.rb +14 -0
- data/db/migrate/20131228041944_create_abstractor_suggestion_object_values.rb +11 -0
- data/db/migrate/20131228041945_create_abstractor_suggestion_sources.rb +16 -0
- data/db/seeds.rb +0 -0
- data/lib/abstractor.rb +8 -0
- data/lib/abstractor/abstractable.rb +190 -0
- data/lib/abstractor/core_ext/string.rb +99 -0
- data/lib/abstractor/engine.rb +14 -0
- data/lib/abstractor/methods/controllers/abstractor_abstraction_groups_controller.rb +37 -0
- data/lib/abstractor/methods/controllers/abstractor_abstractions_controller.rb +42 -0
- data/lib/abstractor/methods/controllers/abstractor_suggestions_controller.rb +28 -0
- data/lib/abstractor/methods/models/abstractor_abstraction.rb +65 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_group.rb +37 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_group_member.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema.rb +27 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema_object_value.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema_predicate_variant.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_schema_relation.rb +18 -0
- data/lib/abstractor/methods/models/abstractor_abstraction_source.rb +29 -0
- data/lib/abstractor/methods/models/abstractor_object_type.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_object_value.rb +24 -0
- data/lib/abstractor/methods/models/abstractor_object_value_variant.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_relation_type.rb +16 -0
- data/lib/abstractor/methods/models/abstractor_rule_type.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_subject.rb +258 -0
- data/lib/abstractor/methods/models/abstractor_subject_group.rb +19 -0
- data/lib/abstractor/methods/models/abstractor_subject_group_member.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_subject_relation.rb +18 -0
- data/lib/abstractor/methods/models/abstractor_suggestion.rb +88 -0
- data/lib/abstractor/methods/models/abstractor_suggestion_object_value.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_suggestion_source.rb +17 -0
- data/lib/abstractor/methods/models/abstractor_suggestion_status.rb +29 -0
- data/lib/abstractor/methods/models/soft_delete.rb +35 -0
- data/lib/abstractor/negation_detection.rb +43 -0
- data/lib/abstractor/parser.rb +76 -0
- data/lib/abstractor/setup.rb +24 -0
- data/lib/abstractor/user_interface.rb +40 -0
- data/lib/abstractor/utility.rb +8 -0
- data/lib/abstractor/version.rb +3 -0
- data/lib/generators/abstractor/install/install_generator.rb +118 -0
- data/lib/generators/abstractor/install/templates/controllers/abstractor_abstraction_groups_controller.rb +16 -0
- data/lib/generators/abstractor/install/templates/controllers/abstractor_abstractions_controller.rb +24 -0
- data/lib/generators/abstractor/install/templates/controllers/abstractor_suggestions_controller.rb +12 -0
- data/lib/generators/abstractor/install/templates/dictionaries.yml +235 -0
- data/lib/generators/abstractor/install/templates/helpers/abstractions_helper.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_group_member.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_object_value.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_predicate_variant.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_schema_relation.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_abstraction_source.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_object_type.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_object_value.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_object_value_variant.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_relation_type.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_rule_type.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject_group.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject_group_member.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_subject_relation.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_object_value.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_source.rb +9 -0
- data/lib/generators/abstractor/install/templates/models/abstractor_suggestion_status.rb +9 -0
- data/lib/lingscope/build.xml +74 -0
- data/lib/lingscope/build/built-jar.properties +4 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/AbnerTokenizer.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/AnnotationComparer.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/Annotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/BaselineAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/BaselineCueAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/BaselineScopeAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/CrfAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/NegexAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/NegexCueAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/NegexScopeAnnotator.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/PosTagger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/negex/GenNegEx.class +0 -0
- data/lib/lingscope/build/classes/lingscope/algorithms/negex/Sorter.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/AnnotatedFilesMerger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/AnnotationComparerDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/BaselineDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/CrfDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/CueAndPosFilesMerger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/ModelTrainer.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/NegexDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/PosTaggerDriver.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/SentencePosTagger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/drivers/SentenceTagger.class +0 -0
- data/lib/lingscope/build/classes/lingscope/io/AnnotatedSentencesIO.class +0 -0
- data/lib/lingscope/build/classes/lingscope/structures/AnnotatedSentence.class +0 -0
- data/lib/lingscope/dist/README.TXT +32 -0
- data/lib/lingscope/dist/javadoc/allclasses-frame.html +80 -0
- data/lib/lingscope/dist/javadoc/allclasses-noframe.html +80 -0
- data/lib/lingscope/dist/javadoc/constant-values.html +199 -0
- data/lib/lingscope/dist/javadoc/deprecated-list.html +147 -0
- data/lib/lingscope/dist/javadoc/help-doc.html +224 -0
- data/lib/lingscope/dist/javadoc/index-files/index-1.html +188 -0
- data/lib/lingscope/dist/javadoc/index-files/index-10.html +149 -0
- data/lib/lingscope/dist/javadoc/index-files/index-11.html +158 -0
- data/lib/lingscope/dist/javadoc/index-files/index-12.html +157 -0
- data/lib/lingscope/dist/javadoc/index-files/index-13.html +177 -0
- data/lib/lingscope/dist/javadoc/index-files/index-14.html +155 -0
- data/lib/lingscope/dist/javadoc/index-files/index-15.html +152 -0
- data/lib/lingscope/dist/javadoc/index-files/index-16.html +146 -0
- data/lib/lingscope/dist/javadoc/index-files/index-2.html +158 -0
- data/lib/lingscope/dist/javadoc/index-files/index-3.html +165 -0
- data/lib/lingscope/dist/javadoc/index-files/index-4.html +146 -0
- data/lib/lingscope/dist/javadoc/index-files/index-5.html +219 -0
- data/lib/lingscope/dist/javadoc/index-files/index-6.html +149 -0
- data/lib/lingscope/dist/javadoc/index-files/index-7.html +155 -0
- data/lib/lingscope/dist/javadoc/index-files/index-8.html +185 -0
- data/lib/lingscope/dist/javadoc/index-files/index-9.html +164 -0
- data/lib/lingscope/dist/javadoc/index.html +74 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/AbnerTokenizer.html +280 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/AnnotationComparer.html +526 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/Annotator.html +401 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineAnnotator.html +375 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineCueAnnotator.html +309 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/BaselineScopeAnnotator.html +340 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/CrfAnnotator.html +340 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexAnnotator.html +364 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexCueAnnotator.html +309 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/NegexScopeAnnotator.html +309 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/PosTagger.html +268 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AbnerTokenizer.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/AnnotationComparer.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/Annotator.html +299 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineAnnotator.html +189 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineCueAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/BaselineScopeAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/CrfAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexAnnotator.html +189 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexCueAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/NegexScopeAnnotator.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/class-use/PosTagger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/GenNegEx.html +369 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/Sorter.html +253 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/GenNegEx.html +181 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/class-use/Sorter.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-frame.html +35 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-summary.html +165 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/negex/package-use.html +174 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-frame.html +53 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-summary.html +198 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-tree.html +160 -0
- data/lib/lingscope/dist/javadoc/lingscope/algorithms/package-use.html +202 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotatedFilesMerger.html +284 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/AnnotationComparerDriver.html +258 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/BaselineDriver.html +260 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/CrfDriver.html +262 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/CueAndPosFilesMerger.html +310 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/ModelTrainer.html +281 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/NegexDriver.html +262 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/PosTaggerDriver.html +289 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/SentencePosTagger.html +313 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/SentenceTagger.html +512 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotatedFilesMerger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/AnnotationComparerDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/BaselineDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CrfDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/CueAndPosFilesMerger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/ModelTrainer.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/NegexDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/PosTaggerDriver.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentencePosTagger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/class-use/SentenceTagger.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-frame.html +51 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-summary.html +195 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/drivers/package-use.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/AnnotatedSentencesIO.html +285 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/class-use/AnnotatedSentencesIO.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-frame.html +33 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-summary.html +158 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/io/package-use.html +145 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/AnnotatedSentence.html +425 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/class-use/AnnotatedSentence.html +423 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-frame.html +33 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-summary.html +158 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-tree.html +154 -0
- data/lib/lingscope/dist/javadoc/lingscope/structures/package-use.html +209 -0
- data/lib/lingscope/dist/javadoc/overview-frame.html +51 -0
- data/lib/lingscope/dist/javadoc/overview-summary.html +168 -0
- data/lib/lingscope/dist/javadoc/overview-tree.html +159 -0
- data/lib/lingscope/dist/javadoc/package-list +5 -0
- data/lib/lingscope/dist/javadoc/resources/inherit.gif +0 -0
- data/lib/lingscope/dist/javadoc/stylesheet.css +29 -0
- data/lib/lingscope/dist/lib/abner.jar +0 -0
- data/lib/lingscope/dist/lib/commons-lang-2.4.jar +0 -0
- data/lib/lingscope/dist/lib/java_utils.jar +0 -0
- data/lib/lingscope/dist/lib/sptoolkit.jar +0 -0
- data/lib/lingscope/dist/lib/stanford-postagger.jar +0 -0
- data/lib/lingscope/dist/lingscope.jar +0 -0
- data/lib/lingscope/lingscope.zip +0 -0
- data/lib/lingscope/manifest.mf +3 -0
- data/lib/lingscope/nbproject/build-impl.xml +1338 -0
- data/lib/lingscope/nbproject/genfiles.properties +8 -0
- data/lib/lingscope/nbproject/private/config.properties +0 -0
- data/lib/lingscope/nbproject/private/private.properties +10 -0
- data/lib/lingscope/nbproject/private/private.xml +5 -0
- data/lib/lingscope/nbproject/project.properties +85 -0
- data/lib/lingscope/nbproject/project.xml +15 -0
- data/lib/lingscope/negation_models.zip +0 -0
- data/lib/lingscope/negation_models/baseline_cue_all_both.model +49 -0
- data/lib/lingscope/negation_models/baseline_cue_all_clinical.model +19 -0
- data/lib/lingscope/negation_models/crf_cue_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_cue_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_crf_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_crf_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_regex_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_cue_regex_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_crf_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_crf_all_clinical.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_regex_all_both.model +0 -0
- data/lib/lingscope/negation_models/crf_scope_words_regex_all_clinical.model +0 -0
- data/lib/lingscope/src/lingscope/algorithms/AbnerTokenizer.java +58 -0
- data/lib/lingscope/src/lingscope/algorithms/AnnotationComparer.java +207 -0
- data/lib/lingscope/src/lingscope/algorithms/Annotator.java +51 -0
- data/lib/lingscope/src/lingscope/algorithms/BaselineAnnotator.java +80 -0
- data/lib/lingscope/src/lingscope/algorithms/BaselineCueAnnotator.java +84 -0
- data/lib/lingscope/src/lingscope/algorithms/BaselineScopeAnnotator.java +101 -0
- data/lib/lingscope/src/lingscope/algorithms/CrfAnnotator.java +45 -0
- data/lib/lingscope/src/lingscope/algorithms/NegexAnnotator.java +52 -0
- data/lib/lingscope/src/lingscope/algorithms/NegexCueAnnotator.java +26 -0
- data/lib/lingscope/src/lingscope/algorithms/NegexScopeAnnotator.java +26 -0
- data/lib/lingscope/src/lingscope/algorithms/PosTagger.java +54 -0
- data/lib/lingscope/src/lingscope/algorithms/negex/GenNegEx.java +530 -0
- data/lib/lingscope/src/lingscope/algorithms/negex/Sorter.java +36 -0
- data/lib/lingscope/src/lingscope/drivers/AnnotatedFilesMerger.java +61 -0
- data/lib/lingscope/src/lingscope/drivers/AnnotationComparerDriver.java +22 -0
- data/lib/lingscope/src/lingscope/drivers/BaselineDriver.java +45 -0
- data/lib/lingscope/src/lingscope/drivers/CrfDriver.java +31 -0
- data/lib/lingscope/src/lingscope/drivers/CueAndPosFilesMerger.java +86 -0
- data/lib/lingscope/src/lingscope/drivers/ModelTrainer.java +39 -0
- data/lib/lingscope/src/lingscope/drivers/NegexDriver.java +32 -0
- data/lib/lingscope/src/lingscope/drivers/PosTaggerDriver.java +62 -0
- data/lib/lingscope/src/lingscope/drivers/SentencePosTagger.java +89 -0
- data/lib/lingscope/src/lingscope/drivers/SentenceTagger.java +158 -0
- data/lib/lingscope/src/lingscope/io/AnnotatedSentencesIO.java +53 -0
- data/lib/lingscope/src/lingscope/structures/AnnotatedSentence.java +105 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.csv +1 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Morphenglish.txt +2273 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.csv +1 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/Topoenglish.txt +1342 -0
- data/lib/setup/data/ICD-O-3_CSV-metadata/icd-o3 readme.txt +21 -0
- data/lib/setup/data/custom_site_synonyms.csv +1 -0
- data/lib/setup/data/diagnosis_categorizations.csv +1 -0
- data/lib/setup/data/high_level_primary_cns_diagnosis_categorizations.yml +597 -0
- data/lib/setup/data/icdo3_sites_with_laterality.txt +11 -0
- data/lib/setup/data/site_site_categories.txt +28 -0
- data/lib/tasks/abstractor_tasks.rake +22 -0
- data/lib/tasks/cucumber.rake +65 -0
- metadata +754 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
package lingscope.algorithms.negex;
|
2
|
+
|
3
|
+
import java.util.*;
|
4
|
+
|
5
|
+
// Utility class to sort the negation rules by length in descending order.
|
6
|
+
// Rules need to be matched by longest first because there is overlap between the
|
7
|
+
// RegEx of the rules.
|
8
|
+
//
|
9
|
+
// Author: Imre Solti
|
10
|
+
// solti@u.washington.edu
|
11
|
+
// Date: 10/20/2008
|
12
|
+
public class Sorter {
|
13
|
+
|
14
|
+
public List<String> sortRules(List<String> unsortedRules) {
|
15
|
+
|
16
|
+
try {
|
17
|
+
// Sort the negation rules by length to make sure
|
18
|
+
// that longest rules match first.
|
19
|
+
String temp = "";
|
20
|
+
for (int i = 0; i < unsortedRules.size() - 1; i++) {
|
21
|
+
for (int j = i + 1; j < unsortedRules.size(); j++) {
|
22
|
+
String a = (String) unsortedRules.get(i);
|
23
|
+
String b = (String) unsortedRules.get(j);
|
24
|
+
if (a.trim().length() < b.trim().length()) {
|
25
|
+
// Sorting into descending order by lebgth of string.
|
26
|
+
unsortedRules.set(i, b);
|
27
|
+
unsortedRules.set(j, a);
|
28
|
+
}
|
29
|
+
}
|
30
|
+
}
|
31
|
+
} catch (Exception e) {
|
32
|
+
System.out.println(e);
|
33
|
+
}
|
34
|
+
return unsortedRules;
|
35
|
+
}
|
36
|
+
}
|
@@ -0,0 +1,61 @@
|
|
1
|
+
package lingscope.drivers;
|
2
|
+
|
3
|
+
import java.util.ArrayList;
|
4
|
+
import java.util.List;
|
5
|
+
import lingscope.io.AnnotatedSentencesIO;
|
6
|
+
import lingscope.structures.AnnotatedSentence;
|
7
|
+
|
8
|
+
/**
|
9
|
+
* Merges two annotated files. Useful to merge a words scope file with a POS cue
|
10
|
+
* file
|
11
|
+
* @author shashank
|
12
|
+
*/
|
13
|
+
public class AnnotatedFilesMerger {
|
14
|
+
|
15
|
+
/**
|
16
|
+
* Merges the given wordsSentence and the given tagsSentence
|
17
|
+
* @param wordsSentence
|
18
|
+
* @param tagsSentence
|
19
|
+
* @return
|
20
|
+
*/
|
21
|
+
public static AnnotatedSentence merge(AnnotatedSentence wordsSentence, AnnotatedSentence tagsSentence) {
|
22
|
+
List<String> words = wordsSentence.getWords();
|
23
|
+
List<String> tags = tagsSentence.getTags();
|
24
|
+
int numTokens = words.size();
|
25
|
+
if (tags.size() != numTokens) {
|
26
|
+
System.err.println("Skipping non-equal length sentences");
|
27
|
+
System.err.println("\tSentence 1: " + wordsSentence.getRawText());
|
28
|
+
System.err.println("\tSentence 2: " + tagsSentence.getRawText());
|
29
|
+
return null;
|
30
|
+
}
|
31
|
+
StringBuilder mergedSentence = new StringBuilder();
|
32
|
+
for (int j = 0; j < numTokens; ++j) {
|
33
|
+
mergedSentence.append(" ").append(words.get(j)).append("|").append(tags.get(j));
|
34
|
+
}
|
35
|
+
return new AnnotatedSentence(mergedSentence.substring(1));
|
36
|
+
}
|
37
|
+
|
38
|
+
/**
|
39
|
+
*
|
40
|
+
* @param args
|
41
|
+
* 0 - file 1: the file from which words will be taken
|
42
|
+
* 1 - file 2: the file from which tags will be taken
|
43
|
+
* 2 - output file path
|
44
|
+
*/
|
45
|
+
public static void main(String[] args) {
|
46
|
+
List<AnnotatedSentence> wordsSentences = AnnotatedSentencesIO.read(args[0]);
|
47
|
+
List<AnnotatedSentence> tagsSentences = AnnotatedSentencesIO.read(args[1]);
|
48
|
+
int numSentences = tagsSentences.size();
|
49
|
+
List<AnnotatedSentence> mergedSentences = new ArrayList<AnnotatedSentence>(numSentences);
|
50
|
+
for (int i = 0; i < numSentences; ++i) {
|
51
|
+
AnnotatedSentence wordsSentence = wordsSentences.get(i);
|
52
|
+
AnnotatedSentence tagsSentence = tagsSentences.get(i);
|
53
|
+
AnnotatedSentence mergedSentence = merge(wordsSentence, tagsSentence);
|
54
|
+
if (mergedSentence == null) {
|
55
|
+
continue;
|
56
|
+
}
|
57
|
+
mergedSentences.add(mergedSentence);
|
58
|
+
}
|
59
|
+
AnnotatedSentencesIO.write(args[2], mergedSentences);
|
60
|
+
}
|
61
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
package lingscope.drivers;
|
2
|
+
|
3
|
+
import lingscope.algorithms.AnnotationComparer;
|
4
|
+
|
5
|
+
/**
|
6
|
+
* Compares annotations between a gold and test file
|
7
|
+
* @author shashank
|
8
|
+
*/
|
9
|
+
public class AnnotationComparerDriver {
|
10
|
+
|
11
|
+
/**
|
12
|
+
*
|
13
|
+
* @param args
|
14
|
+
* 0 - gold file
|
15
|
+
* 1 - test file
|
16
|
+
*/
|
17
|
+
public static void main(String[] args) {
|
18
|
+
AnnotationComparer comparer = new AnnotationComparer(10);
|
19
|
+
comparer.compareAnnotationFiles(args[0], args[1]);
|
20
|
+
comparer.printStats();
|
21
|
+
}
|
22
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
package lingscope.drivers;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import lingscope.algorithms.Annotator;
|
5
|
+
import lingscope.algorithms.BaselineScopeAnnotator;
|
6
|
+
import lingscope.io.AnnotatedSentencesIO;
|
7
|
+
import lingscope.structures.AnnotatedSentence;
|
8
|
+
|
9
|
+
/**
|
10
|
+
*
|
11
|
+
* @author shashank
|
12
|
+
*/
|
13
|
+
public class BaselineDriver {
|
14
|
+
|
15
|
+
/**
|
16
|
+
*
|
17
|
+
* @param args
|
18
|
+
* 0 - Annotator type ("cue" or "scope")
|
19
|
+
* 1 - Serialized annotator file
|
20
|
+
* 2 - Input file
|
21
|
+
* 3 - Output file
|
22
|
+
* 4 - if annotator type is "scope", then should scope be limited by commas
|
23
|
+
* 5 - if annotator type is "scope", then should scope be limited by periods
|
24
|
+
* 6 - (boolean) does the input file contain annotated sentence (true) or
|
25
|
+
* not (false)
|
26
|
+
*/
|
27
|
+
public static void main(String[] args) {
|
28
|
+
Annotator annotator;
|
29
|
+
|
30
|
+
if (args[0].equalsIgnoreCase("scope")) {
|
31
|
+
annotator = new BaselineScopeAnnotator(SentenceTagger.SCOPE_START,
|
32
|
+
SentenceTagger.SCOPE_INTER, SentenceTagger.OTHER,
|
33
|
+
Boolean.parseBoolean(args[4]), Boolean.parseBoolean(args[5]));
|
34
|
+
} else {
|
35
|
+
annotator = SentenceTagger.getAnnotator("baseline", args[0]);
|
36
|
+
}
|
37
|
+
annotator.loadAnnotator(args[1]);
|
38
|
+
|
39
|
+
boolean isAnnotated = Boolean.parseBoolean(args[6]);
|
40
|
+
List<String> inputSentences = SentenceTagger.getStringList(args[2], isAnnotated);
|
41
|
+
List<AnnotatedSentence> outputSentences = SentenceTagger.annotateSentences(annotator, inputSentences, isAnnotated);
|
42
|
+
AnnotatedSentencesIO.write(args[3], outputSentences);
|
43
|
+
|
44
|
+
}
|
45
|
+
}
|
@@ -0,0 +1,31 @@
|
|
1
|
+
package lingscope.drivers;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import lingscope.algorithms.Annotator;
|
5
|
+
import lingscope.io.AnnotatedSentencesIO;
|
6
|
+
import lingscope.structures.AnnotatedSentence;
|
7
|
+
|
8
|
+
/**
|
9
|
+
* Driver to mark scope or cue in a file
|
10
|
+
* @author shashank
|
11
|
+
*/
|
12
|
+
public class CrfDriver {
|
13
|
+
/**
|
14
|
+
*
|
15
|
+
* @param args
|
16
|
+
* 0 - Annotator type ("cue" or "scope")
|
17
|
+
* 1 - Serialized annotator file
|
18
|
+
* 2 - Input file
|
19
|
+
* 3 - Output file
|
20
|
+
* 4 - (boolean) does the input file contain annotated sentence (true) or
|
21
|
+
* not (false)
|
22
|
+
*/
|
23
|
+
public static void main(String[] args) {
|
24
|
+
Annotator annotator = SentenceTagger.getAnnotator("crf", args[0]);
|
25
|
+
annotator.loadAnnotator(args[1]);
|
26
|
+
boolean isAnnotated = Boolean.parseBoolean(args[4]);
|
27
|
+
List<String> inputSentences = SentenceTagger.getStringList(args[2], isAnnotated);
|
28
|
+
List<AnnotatedSentence> outputSentences = SentenceTagger.annotateSentences(annotator, inputSentences, isAnnotated);
|
29
|
+
AnnotatedSentencesIO.write(args[3], outputSentences);
|
30
|
+
}
|
31
|
+
}
|
@@ -0,0 +1,86 @@
|
|
1
|
+
package lingscope.drivers;
|
2
|
+
|
3
|
+
import generalutils.FileOperations;
|
4
|
+
import java.util.ArrayList;
|
5
|
+
import java.util.List;
|
6
|
+
import java.util.logging.Level;
|
7
|
+
import java.util.logging.Logger;
|
8
|
+
import lingscope.io.AnnotatedSentencesIO;
|
9
|
+
import lingscope.structures.AnnotatedSentence;
|
10
|
+
|
11
|
+
/**
|
12
|
+
* Merges two files, one containing pos tags and the other containing
|
13
|
+
* annotated cues
|
14
|
+
* @author shashank
|
15
|
+
*/
|
16
|
+
public class CueAndPosFilesMerger {
|
17
|
+
|
18
|
+
public static AnnotatedSentence merge(AnnotatedSentence cueSentence, String posSentence, boolean replaceTags) {
|
19
|
+
String[] posTags = posSentence.split("\\s+");
|
20
|
+
List<String> crfTags = cueSentence.getTags();
|
21
|
+
List<String> words = cueSentence.getWords();
|
22
|
+
List<Boolean> tagStatusList = cueSentence.getIsAnnotatedTags();
|
23
|
+
StringBuilder mergedSentence = new StringBuilder();
|
24
|
+
int numWords = posTags.length;
|
25
|
+
for (int j = 0; j < numWords; ++j) {
|
26
|
+
mergedSentence.append(" ");
|
27
|
+
String posTag = posTags[j];
|
28
|
+
String word = words.get(j);
|
29
|
+
String crfTag = crfTags.get(j);
|
30
|
+
boolean tagStatus = tagStatusList.get(j);
|
31
|
+
if (tagStatus) {
|
32
|
+
if (replaceTags) {
|
33
|
+
mergedSentence.append("CUE|");
|
34
|
+
} else {
|
35
|
+
mergedSentence.append(word).append("|");
|
36
|
+
}
|
37
|
+
} else {
|
38
|
+
mergedSentence.append(posTag).append("|");
|
39
|
+
}
|
40
|
+
mergedSentence.append(crfTag);
|
41
|
+
}
|
42
|
+
|
43
|
+
AnnotatedSentence mergedAnnotatedSentence = new AnnotatedSentence(mergedSentence.substring(1));
|
44
|
+
return mergedAnnotatedSentence;
|
45
|
+
}
|
46
|
+
|
47
|
+
/**
|
48
|
+
* Merges the cueSentences and posSentences
|
49
|
+
* @param cueSentences
|
50
|
+
* @param posSentences
|
51
|
+
* @param replaceTags
|
52
|
+
* @return
|
53
|
+
*/
|
54
|
+
public static List<AnnotatedSentence> merge(List<AnnotatedSentence> cueSentences, List<String> posSentences, boolean replaceTags) {
|
55
|
+
List<AnnotatedSentence> mergedSentences = new ArrayList<AnnotatedSentence>(cueSentences.size());
|
56
|
+
int numSentences = posSentences.size();
|
57
|
+
for (int i = 0; i < numSentences; ++i) {
|
58
|
+
AnnotatedSentence cueSentence = cueSentences.get(i);
|
59
|
+
String posSentence = posSentences.get(i);
|
60
|
+
AnnotatedSentence mergedAnnotatedSentence = merge(cueSentence, posSentence, replaceTags);
|
61
|
+
mergedSentences.add(mergedAnnotatedSentence);
|
62
|
+
}
|
63
|
+
return mergedSentences;
|
64
|
+
}
|
65
|
+
|
66
|
+
/**
|
67
|
+
*
|
68
|
+
* @param args
|
69
|
+
* 0 - cue input file
|
70
|
+
* 1 - pos input file
|
71
|
+
* 2 - replace cue with custom tag 'CUE' (true) or leave it as it is (false)
|
72
|
+
* 3 - merged file output path
|
73
|
+
*/
|
74
|
+
public static void main(String[] args) {
|
75
|
+
boolean replaceTags = Boolean.parseBoolean(args[2]);
|
76
|
+
List<AnnotatedSentence> cueSentences = AnnotatedSentencesIO.read(args[0]);
|
77
|
+
List<String> posSentences = null;
|
78
|
+
try {
|
79
|
+
posSentences = FileOperations.readFile(args[1]);
|
80
|
+
} catch (Exception ex) {
|
81
|
+
Logger.getLogger(CueAndPosFilesMerger.class.getName()).log(Level.SEVERE, null, ex);
|
82
|
+
System.exit(1);
|
83
|
+
}
|
84
|
+
AnnotatedSentencesIO.write(args[3], merge(cueSentences, posSentences, replaceTags));
|
85
|
+
}
|
86
|
+
}
|
@@ -0,0 +1,39 @@
|
|
1
|
+
package lingscope.drivers;
|
2
|
+
|
3
|
+
import lingscope.algorithms.Annotator;
|
4
|
+
|
5
|
+
/**
|
6
|
+
* Driver to train a model file. The training data will have to be provided.
|
7
|
+
* @author shashank
|
8
|
+
*/
|
9
|
+
public class ModelTrainer {
|
10
|
+
|
11
|
+
/**
|
12
|
+
* Prints the usage for the model trainer
|
13
|
+
*/
|
14
|
+
public static void usage() {
|
15
|
+
System.out.println("Usage:\njava lingscope.drivers.ModelTrainer (cue|scope) (crf|baseline|negex) training_data_file file_where_model_will_be_saved");
|
16
|
+
}
|
17
|
+
|
18
|
+
/**
|
19
|
+
*
|
20
|
+
* @param args
|
21
|
+
* 0 - Annotator type ("cue" or "scope")
|
22
|
+
* 1 - Model type ("crf", "baseline" or "negex")
|
23
|
+
* 2 - File from which training data will be read
|
24
|
+
* 2 - File where the model will be saved
|
25
|
+
*/
|
26
|
+
public static void main(String[] args) {
|
27
|
+
if (args.length != 4) {
|
28
|
+
usage();
|
29
|
+
System.exit(0);
|
30
|
+
}
|
31
|
+
Annotator annotator = SentenceTagger.getAnnotator(args[1], args[0]);
|
32
|
+
if (annotator == null) {
|
33
|
+
usage();
|
34
|
+
System.exit(1);
|
35
|
+
}
|
36
|
+
annotator.serializeAnnotator(args[2], args[3]);
|
37
|
+
}
|
38
|
+
|
39
|
+
}
|
@@ -0,0 +1,32 @@
|
|
1
|
+
package lingscope.drivers;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import lingscope.algorithms.Annotator;
|
5
|
+
import lingscope.io.AnnotatedSentencesIO;
|
6
|
+
import lingscope.structures.AnnotatedSentence;
|
7
|
+
|
8
|
+
/**
|
9
|
+
* Runs the annotators
|
10
|
+
* @author shashank
|
11
|
+
*/
|
12
|
+
public class NegexDriver {
|
13
|
+
|
14
|
+
/**
|
15
|
+
*
|
16
|
+
* @param args
|
17
|
+
* 0 - Annotator type ("cue" or "scope")
|
18
|
+
* 1 - Serialized annotator file
|
19
|
+
* 2 - Input file
|
20
|
+
* 3 - Output file
|
21
|
+
* 4 - (boolean) does the input file contain annotated sentence (true) or
|
22
|
+
* not (false)
|
23
|
+
*/
|
24
|
+
public static void main(String[] args) {
|
25
|
+
Annotator annotator = SentenceTagger.getAnnotator("negex", args[0]);
|
26
|
+
annotator.loadAnnotator(args[1]);
|
27
|
+
boolean isAnnotated = Boolean.parseBoolean(args[4]);
|
28
|
+
List<String> inputSentences = SentenceTagger.getStringList(args[2], isAnnotated);
|
29
|
+
List<AnnotatedSentence> outputSentences = SentenceTagger.annotateSentences(annotator, inputSentences, isAnnotated);
|
30
|
+
AnnotatedSentencesIO.write(args[3], outputSentences);
|
31
|
+
}
|
32
|
+
}
|
@@ -0,0 +1,62 @@
|
|
1
|
+
package lingscope.drivers;
|
2
|
+
|
3
|
+
import generalutils.FileOperations;
|
4
|
+
import java.util.ArrayList;
|
5
|
+
import java.util.List;
|
6
|
+
import java.util.logging.Level;
|
7
|
+
import java.util.logging.Logger;
|
8
|
+
import lingscope.algorithms.PosTagger;
|
9
|
+
|
10
|
+
/**
|
11
|
+
* Driver for the Part of Speech tagger. Use this to tag all sentences in a
|
12
|
+
* given file with part of speech tags
|
13
|
+
* @author shashank
|
14
|
+
*/
|
15
|
+
public class PosTaggerDriver {
|
16
|
+
|
17
|
+
private static PosTagger posTagger = null;
|
18
|
+
|
19
|
+
/**
|
20
|
+
* Gets the equivalent POS sentence for the given sentenceToTag
|
21
|
+
* @param grammerFile file containing POS model
|
22
|
+
* @param sentenceToTag sentence for which POS tags will be returned
|
23
|
+
* @param is
|
24
|
+
* @return a string where the words from sentenceToTag are replaced with
|
25
|
+
* corresponding part of speech tags
|
26
|
+
*/
|
27
|
+
public static String getTaggedSentence(String grammerFile, String sentenceToTag, boolean isTokenized) {
|
28
|
+
if (posTagger == null) {
|
29
|
+
posTagger = new PosTagger(grammerFile);
|
30
|
+
}
|
31
|
+
List<String> posTags = posTagger.replaceWordsWithPos(sentenceToTag, isTokenized);
|
32
|
+
StringBuilder posSentence = new StringBuilder();
|
33
|
+
for (String posTag : posTags) {
|
34
|
+
posSentence.append(" ").append(posTag);
|
35
|
+
}
|
36
|
+
return posSentence.substring(1);
|
37
|
+
}
|
38
|
+
|
39
|
+
/**
|
40
|
+
*
|
41
|
+
* @param args
|
42
|
+
* 0 - file containing the part of speech model
|
43
|
+
* 1 - input file
|
44
|
+
* 2 - output file
|
45
|
+
* 3 - (boolean) does the input file contain annotated sentence (true) or
|
46
|
+
* not (false)
|
47
|
+
*/
|
48
|
+
public static void main(String[] args) {
|
49
|
+
String grammarFile = args[0];
|
50
|
+
List<String> inputSentences = SentenceTagger.getStringList(args[1], Boolean.parseBoolean(args[3]));
|
51
|
+
List<String> outputSentences = new ArrayList<String>(inputSentences.size());
|
52
|
+
for (String inputSentence : inputSentences) {
|
53
|
+
String outputSentence = getTaggedSentence(grammarFile, inputSentence, Boolean.parseBoolean(args[3]));
|
54
|
+
outputSentences.add(outputSentence);
|
55
|
+
}
|
56
|
+
try {
|
57
|
+
FileOperations.writeFile(args[2], outputSentences);
|
58
|
+
} catch (Exception ex) {
|
59
|
+
Logger.getLogger(PosTaggerDriver.class.getName()).log(Level.SEVERE, null, ex);
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
@@ -0,0 +1,89 @@
|
|
1
|
+
package lingscope.drivers;
|
2
|
+
|
3
|
+
import generalutils.FileOperations;
|
4
|
+
import java.util.List;
|
5
|
+
import lingscope.algorithms.Annotator;
|
6
|
+
import lingscope.structures.AnnotatedSentence;
|
7
|
+
|
8
|
+
/**
|
9
|
+
* Use this sentence tagger when using a model that tags POS
|
10
|
+
* @author shashank
|
11
|
+
*/
|
12
|
+
public class SentencePosTagger {
|
13
|
+
|
14
|
+
public static void usage() {
|
15
|
+
System.out.println("java lingscope.drivers.SentencePosTagger cue_tagging_model "
|
16
|
+
+ "cue_tagger_type(baseline|crf|negex) "
|
17
|
+
+ "replace_cue_with_custom_tag(true|false) scope_tagging_model "
|
18
|
+
+ "pos_model_file sentence_to_tag");
|
19
|
+
System.out.println("\tSaved model for negation can be obtained from http://negscope.askhermes.org/");
|
20
|
+
System.out.println("\tSaved model for speculation can be obtained from http://hedgescope.askhermes.org/");
|
21
|
+
System.out.println("\tSaved model for NegEx can be obtained from http://code.google.com/p/negex/downloads/list");
|
22
|
+
System.out.println("\tSaved pos_model_file can be obtained from http://hedgescope.askhermes.org/");
|
23
|
+
}
|
24
|
+
|
25
|
+
/**
|
26
|
+
*
|
27
|
+
* @param args
|
28
|
+
* 0 - cue tagging model
|
29
|
+
* 1 - cue tagger type (baseline, crf or negex)
|
30
|
+
* 2 - replace cue words with custom tag CUE (true) or not (false)
|
31
|
+
* 3 - crf pos-based scope tagging model
|
32
|
+
* 4 - POS model file
|
33
|
+
* 5 - sentence to tag
|
34
|
+
*/
|
35
|
+
public static void main(String[] args) {
|
36
|
+
if (args[0].equalsIgnoreCase("help")) {
|
37
|
+
usage();
|
38
|
+
System.exit(0);
|
39
|
+
} else if (args.length < 6) {
|
40
|
+
usage();
|
41
|
+
System.exit(1);
|
42
|
+
}
|
43
|
+
Annotator cueAnnotator = SentenceTagger.getAnnotator(args[1], "cue");
|
44
|
+
cueAnnotator.loadAnnotator(args[0]);
|
45
|
+
Annotator scopeAnnotator = SentenceTagger.getAnnotator("crf", "scope");
|
46
|
+
scopeAnnotator.loadAnnotator(args[3]);
|
47
|
+
String sentence = args[5];
|
48
|
+
String grammarFile = args[4];
|
49
|
+
|
50
|
+
if ("file".equalsIgnoreCase(sentence)) {
|
51
|
+
String sentencesFile = args[6];
|
52
|
+
try {
|
53
|
+
List<String> sentences = FileOperations.readFile(sentencesFile);
|
54
|
+
for (String sentenceText : sentences) {
|
55
|
+
tagSentence(sentenceText, grammarFile,
|
56
|
+
Boolean.parseBoolean(args[2]), cueAnnotator,
|
57
|
+
scopeAnnotator);
|
58
|
+
}
|
59
|
+
} catch (Exception ex) {
|
60
|
+
ex.printStackTrace(System.err);
|
61
|
+
}
|
62
|
+
} else {
|
63
|
+
tagSentence(sentence, grammarFile, Boolean.parseBoolean(args[2]),
|
64
|
+
cueAnnotator, scopeAnnotator);
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
/**
|
69
|
+
* Tags the given sentence
|
70
|
+
* @param sentence the text of the sentence to tag
|
71
|
+
* @param grammarFile path to the Stanford part of speech model file
|
72
|
+
* @param replaceCueWords if true, cue words will be replaced with custom
|
73
|
+
* tag 'CUE'
|
74
|
+
* @param cueAnnotator the {@link Annotator} object to identify negation or
|
75
|
+
* hedge cue in the sentence
|
76
|
+
* @param scopeAnnotator the {@link Annotator} object to identify negation
|
77
|
+
* or hedge scope in the sentence
|
78
|
+
*/
|
79
|
+
public static void tagSentence(String sentence, String grammarFile,
|
80
|
+
boolean replaceCueWords, Annotator cueAnnotator, Annotator scopeAnnotator) {
|
81
|
+
String posSentence = PosTaggerDriver.getTaggedSentence(grammarFile, sentence, false);
|
82
|
+
AnnotatedSentence cueTaggedSentence = cueAnnotator.annotateSentence(sentence, false);
|
83
|
+
AnnotatedSentence posCueMerged = CueAndPosFilesMerger.merge(cueTaggedSentence, posSentence, replaceCueWords);
|
84
|
+
AnnotatedSentence scopeMarkedSentence = scopeAnnotator.annotateSentence(posCueMerged.getSentenceText(), true);
|
85
|
+
AnnotatedSentence scopeWordsMarkedSentence = AnnotatedFilesMerger.merge(cueTaggedSentence, scopeMarkedSentence);
|
86
|
+
System.out.println(scopeWordsMarkedSentence.getRawText());
|
87
|
+
|
88
|
+
}
|
89
|
+
}
|