de.oddb 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Guide.txt +3 -0
- data/History.txt +5 -0
- data/LICENCE.txt +339 -0
- data/Manifest.txt +430 -0
- data/README +423 -0
- data/README.txt +25 -0
- data/Rakefile +28 -0
- data/bin/admin +71 -0
- data/bin/exportd +44 -0
- data/bin/oddbd +33 -0
- data/data/fulltext/data/dicts/french/fulltext.aff +1057 -0
- data/data/fulltext/data/dicts/french/fulltext.dict +91189 -0
- data/data/fulltext/data/dicts/french/fulltext.stop +135 -0
- data/data/fulltext/data/dicts/german/fulltext.aff +1233 -0
- data/data/fulltext/data/dicts/german/fulltext.dict +287574 -0
- data/data/fulltext/data/dicts/german/fulltext.stop +133 -0
- data/data/fulltext/data/german_compound/README +15 -0
- data/data/fulltext/data/german_compound/compound.pl +63 -0
- data/data/fulltext/data/german_compound/german.stop +20 -0
- data/data/fulltext/data/ispell-german-compound.tar.gz +0 -0
- data/data/fulltext/redist/dict_french/Makefile +12 -0
- data/data/fulltext/redist/dict_french/README.french +1 -0
- data/data/fulltext/redist/dict_french/dict_french.sql.in +7 -0
- data/data/fulltext/redist/dict_french/dict_snowball.c +56 -0
- data/data/fulltext/redist/dict_french/french_stem.c +1222 -0
- data/data/fulltext/redist/dict_french/french_stem.h +16 -0
- data/data/fulltext/redist/dict_french/subinclude.h +2 -0
- data/data/fulltext/redist/dict_german/Makefile +12 -0
- data/data/fulltext/redist/dict_german/README.german +1 -0
- data/data/fulltext/redist/dict_german/dict_german.sql.in +7 -0
- data/data/fulltext/redist/dict_german/dict_snowball.c +56 -0
- data/data/fulltext/redist/dict_german/german_stem.c +527 -0
- data/data/fulltext/redist/dict_german/german_stem.h +16 -0
- data/data/fulltext/redist/dict_german/subinclude.h +1 -0
- data/data/fulltext/redist/french_stem.c +1222 -0
- data/data/fulltext/redist/french_stem.h +16 -0
- data/data/fulltext/redist/german_stem.c +527 -0
- data/data/fulltext/redist/german_stem.h +16 -0
- data/jobs/export_chde_xls +20 -0
- data/jobs/export_csv +20 -0
- data/jobs/export_fachinfo_yaml +20 -0
- data/jobs/export_patinfo_yaml +20 -0
- data/jobs/export_yaml +20 -0
- data/jobs/import_dimdi +15 -0
- data/jobs/import_gkv +19 -0
- data/jobs/import_pharma24 +15 -0
- data/jobs/import_pharmnet +30 -0
- data/jobs/import_whocc +18 -0
- data/lib/fixes/singular.rb +9 -0
- data/lib/fixes/yaml.rb +13 -0
- data/lib/oddb.rb +13 -0
- data/lib/oddb/business/company.rb +18 -0
- data/lib/oddb/business/grant_download.rb +27 -0
- data/lib/oddb/business/invoice.rb +75 -0
- data/lib/oddb/config.rb +112 -0
- data/lib/oddb/currency.rb +6 -0
- data/lib/oddb/drugs.rb +16 -0
- data/lib/oddb/drugs/active_agent.rb +37 -0
- data/lib/oddb/drugs/atc.rb +53 -0
- data/lib/oddb/drugs/composition.rb +41 -0
- data/lib/oddb/drugs/ddd.rb +24 -0
- data/lib/oddb/drugs/dose.rb +107 -0
- data/lib/oddb/drugs/galenic_form.rb +21 -0
- data/lib/oddb/drugs/galenic_group.rb +17 -0
- data/lib/oddb/drugs/package.rb +111 -0
- data/lib/oddb/drugs/part.rb +55 -0
- data/lib/oddb/drugs/product.rb +25 -0
- data/lib/oddb/drugs/sequence.rb +68 -0
- data/lib/oddb/drugs/substance.rb +31 -0
- data/lib/oddb/drugs/substance_group.rb +13 -0
- data/lib/oddb/drugs/unit.rb +12 -0
- data/lib/oddb/export.rb +4 -0
- data/lib/oddb/export/csv.rb +94 -0
- data/lib/oddb/export/l10n_sessions.rb +30 -0
- data/lib/oddb/export/rss.rb +44 -0
- data/lib/oddb/export/server.rb +137 -0
- data/lib/oddb/export/xls.rb +127 -0
- data/lib/oddb/export/yaml.rb +212 -0
- data/lib/oddb/html/state/download.rb +13 -0
- data/lib/oddb/html/state/drugs/admin/package.rb +190 -0
- data/lib/oddb/html/state/drugs/admin/product.rb +56 -0
- data/lib/oddb/html/state/drugs/admin/sequence.rb +253 -0
- data/lib/oddb/html/state/drugs/ajax/explain_ddd_price.rb +19 -0
- data/lib/oddb/html/state/drugs/ajax/explain_price.rb +19 -0
- data/lib/oddb/html/state/drugs/ajax/global.rb +18 -0
- data/lib/oddb/html/state/drugs/ajax/package_infos.rb +19 -0
- data/lib/oddb/html/state/drugs/ajax/remote_infos.rb +19 -0
- data/lib/oddb/html/state/drugs/atc_browser.rb +39 -0
- data/lib/oddb/html/state/drugs/atc_guidelines.rb +21 -0
- data/lib/oddb/html/state/drugs/compare.rb +52 -0
- data/lib/oddb/html/state/drugs/download_export.rb +18 -0
- data/lib/oddb/html/state/drugs/downloads.rb +42 -0
- data/lib/oddb/html/state/drugs/fachinfo.rb +21 -0
- data/lib/oddb/html/state/drugs/feedback.rb +91 -0
- data/lib/oddb/html/state/drugs/global.rb +270 -0
- data/lib/oddb/html/state/drugs/init.rb +18 -0
- data/lib/oddb/html/state/drugs/login.rb +17 -0
- data/lib/oddb/html/state/drugs/package.rb +32 -0
- data/lib/oddb/html/state/drugs/patinfo.rb +21 -0
- data/lib/oddb/html/state/drugs/products.rb +51 -0
- data/lib/oddb/html/state/drugs/result.rb +125 -0
- data/lib/oddb/html/state/global.rb +206 -0
- data/lib/oddb/html/state/global_predefine.rb +17 -0
- data/lib/oddb/html/state/limit.rb +17 -0
- data/lib/oddb/html/state/login.rb +56 -0
- data/lib/oddb/html/state/paypal/checkout.rb +97 -0
- data/lib/oddb/html/state/paypal/collect.rb +19 -0
- data/lib/oddb/html/state/paypal/download.rb +61 -0
- data/lib/oddb/html/state/paypal/redirect.rb +18 -0
- data/lib/oddb/html/state/register_download.rb +24 -0
- data/lib/oddb/html/state/register_export.rb +38 -0
- data/lib/oddb/html/state/register_poweruser.rb +17 -0
- data/lib/oddb/html/state/viral/admin.rb +79 -0
- data/lib/oddb/html/state/viral/poweruser.rb +16 -0
- data/lib/oddb/html/util/annotated_list.rb +39 -0
- data/lib/oddb/html/util/know_it_all.rb +28 -0
- data/lib/oddb/html/util/known_user.rb +55 -0
- data/lib/oddb/html/util/lookandfeel.rb +698 -0
- data/lib/oddb/html/util/need_all_input.rb +29 -0
- data/lib/oddb/html/util/session.rb +84 -0
- data/lib/oddb/html/util/sort.rb +72 -0
- data/lib/oddb/html/util/unsaved_helper.rb +20 -0
- data/lib/oddb/html/util/validator.rb +59 -0
- data/lib/oddb/html/view/ajax/json.rb +22 -0
- data/lib/oddb/html/view/alpha_header.rb +28 -0
- data/lib/oddb/html/view/document.rb +117 -0
- data/lib/oddb/html/view/download.rb +33 -0
- data/lib/oddb/html/view/drugs/admin/package.rb +245 -0
- data/lib/oddb/html/view/drugs/admin/product.rb +104 -0
- data/lib/oddb/html/view/drugs/admin/sequence.rb +305 -0
- data/lib/oddb/html/view/drugs/ajax/explain_ddd_price.rb +87 -0
- data/lib/oddb/html/view/drugs/ajax/explain_price.rb +61 -0
- data/lib/oddb/html/view/drugs/ajax/package_infos.rb +105 -0
- data/lib/oddb/html/view/drugs/ajax/remote_infos.rb +44 -0
- data/lib/oddb/html/view/drugs/atc_browser.rb +68 -0
- data/lib/oddb/html/view/drugs/atc_guidelines.rb +94 -0
- data/lib/oddb/html/view/drugs/compare.rb +95 -0
- data/lib/oddb/html/view/drugs/download_export.rb +28 -0
- data/lib/oddb/html/view/drugs/downloads.rb +128 -0
- data/lib/oddb/html/view/drugs/fachinfo.rb +46 -0
- data/lib/oddb/html/view/drugs/feedback.rb +235 -0
- data/lib/oddb/html/view/drugs/init.rb +51 -0
- data/lib/oddb/html/view/drugs/legend.rb +24 -0
- data/lib/oddb/html/view/drugs/package.rb +403 -0
- data/lib/oddb/html/view/drugs/patinfo.rb +46 -0
- data/lib/oddb/html/view/drugs/products.rb +97 -0
- data/lib/oddb/html/view/drugs/result.rb +296 -0
- data/lib/oddb/html/view/drugs/search.rb +33 -0
- data/lib/oddb/html/view/drugs/template.rb +15 -0
- data/lib/oddb/html/view/foot.rb +52 -0
- data/lib/oddb/html/view/google.rb +23 -0
- data/lib/oddb/html/view/google_ads.rb +40 -0
- data/lib/oddb/html/view/head.rb +78 -0
- data/lib/oddb/html/view/limit.rb +109 -0
- data/lib/oddb/html/view/list.rb +59 -0
- data/lib/oddb/html/view/login.rb +38 -0
- data/lib/oddb/html/view/navigation.rb +67 -0
- data/lib/oddb/html/view/offset_header.rb +35 -0
- data/lib/oddb/html/view/paypal/collect.rb +95 -0
- data/lib/oddb/html/view/paypal/redirect.rb +51 -0
- data/lib/oddb/html/view/paypal/register_form.rb +149 -0
- data/lib/oddb/html/view/register_download.rb +29 -0
- data/lib/oddb/html/view/register_export.rb +29 -0
- data/lib/oddb/html/view/register_poweruser.rb +29 -0
- data/lib/oddb/html/view/rss/feedback.rb +64 -0
- data/lib/oddb/html/view/rss_preview.rb +61 -0
- data/lib/oddb/html/view/search.rb +104 -0
- data/lib/oddb/html/view/snapback.rb +24 -0
- data/lib/oddb/html/view/template.rb +56 -0
- data/lib/oddb/import/dimdi.rb +583 -0
- data/lib/oddb/import/excel.rb +45 -0
- data/lib/oddb/import/gkv.rb +463 -0
- data/lib/oddb/import/importer.rb +36 -0
- data/lib/oddb/import/pharma24.rb +211 -0
- data/lib/oddb/import/pharmnet.rb +1186 -0
- data/lib/oddb/import/rtf.rb +409 -0
- data/lib/oddb/import/whocc.rb +148 -0
- data/lib/oddb/import/xml.rb +15 -0
- data/lib/oddb/model.rb +179 -0
- data/lib/oddb/persistence.rb +22 -0
- data/lib/oddb/persistence/odba.rb +32 -0
- data/lib/oddb/persistence/odba/business/company.rb +13 -0
- data/lib/oddb/persistence/odba/business/grant_download.rb +14 -0
- data/lib/oddb/persistence/odba/business/invoice.rb +15 -0
- data/lib/oddb/persistence/odba/drugs/atc.rb +15 -0
- data/lib/oddb/persistence/odba/drugs/galenic_form.rb +18 -0
- data/lib/oddb/persistence/odba/drugs/galenic_group.rb +13 -0
- data/lib/oddb/persistence/odba/drugs/package.rb +25 -0
- data/lib/oddb/persistence/odba/drugs/product.rb +13 -0
- data/lib/oddb/persistence/odba/drugs/sequence.rb +21 -0
- data/lib/oddb/persistence/odba/drugs/substance.rb +21 -0
- data/lib/oddb/persistence/odba/drugs/substance_group.rb +13 -0
- data/lib/oddb/persistence/odba/drugs/unit.rb +13 -0
- data/lib/oddb/persistence/odba/export.rb +26 -0
- data/lib/oddb/persistence/odba/model.rb +68 -0
- data/lib/oddb/persistence/odba/text/document.rb +11 -0
- data/lib/oddb/persistence/odba/util/code.rb +11 -0
- data/lib/oddb/persistence/odba/util/m10l_document.rb +13 -0
- data/lib/oddb/persistence/og.rb +16 -0
- data/lib/oddb/persistence/og/drugs/composition.rb +14 -0
- data/lib/oddb/persistence/og/drugs/product.rb +14 -0
- data/lib/oddb/persistence/og/drugs/sequence.rb +15 -0
- data/lib/oddb/persistence/og/model.rb +25 -0
- data/lib/oddb/persistence/og/util/multilingual.rb +13 -0
- data/lib/oddb/redist/rtf_tools/reader.rb +139 -0
- data/lib/oddb/remote/business/company.rb +17 -0
- data/lib/oddb/remote/drugs/active_agent.rb +27 -0
- data/lib/oddb/remote/drugs/atc.rb +31 -0
- data/lib/oddb/remote/drugs/dose.rb +8 -0
- data/lib/oddb/remote/drugs/galenic_form.rb +24 -0
- data/lib/oddb/remote/drugs/package.rb +128 -0
- data/lib/oddb/remote/drugs/part.rb +30 -0
- data/lib/oddb/remote/drugs/substance.rb +20 -0
- data/lib/oddb/remote/drugs/unit.rb +20 -0
- data/lib/oddb/remote/object.rb +36 -0
- data/lib/oddb/text/chapter.rb +23 -0
- data/lib/oddb/text/document.rb +42 -0
- data/lib/oddb/text/format.rb +37 -0
- data/lib/oddb/text/paragraph.rb +53 -0
- data/lib/oddb/text/picture.rb +89 -0
- data/lib/oddb/text/table.rb +68 -0
- data/lib/oddb/util.rb +9 -0
- data/lib/oddb/util/annotated_list.rb +37 -0
- data/lib/oddb/util/code.rb +69 -0
- data/lib/oddb/util/comparison.rb +36 -0
- data/lib/oddb/util/download.rb +17 -0
- data/lib/oddb/util/exporter.rb +8 -0
- data/lib/oddb/util/feedback.rb +23 -0
- data/lib/oddb/util/ipn.rb +53 -0
- data/lib/oddb/util/job.rb +23 -0
- data/lib/oddb/util/logger.rb +20 -0
- data/lib/oddb/util/m10l_document.rb +41 -0
- data/lib/oddb/util/mail.rb +87 -0
- data/lib/oddb/util/money.rb +64 -0
- data/lib/oddb/util/multilingual.rb +70 -0
- data/lib/oddb/util/quanty.rb +3 -0
- data/lib/oddb/util/quanty/fact.rb +242 -0
- data/lib/oddb/util/quanty/main.rb +164 -0
- data/lib/oddb/util/quanty/parse.rb +872 -0
- data/lib/oddb/util/quanty/units.dump +0 -0
- data/lib/oddb/util/server.rb +150 -0
- data/lib/oddb/util/smtp_tls.rb +58 -0
- data/lib/oddb/util/updater.rb +161 -0
- data/lib/oddb/util/ydim.rb +110 -0
- data/lib/oddb/util/yus.rb +46 -0
- data/test/business/test_company.rb +29 -0
- data/test/business/test_grant_download.rb +29 -0
- data/test/drugs/test_active_agent.rb +53 -0
- data/test/drugs/test_atc.rb +54 -0
- data/test/drugs/test_composition.rb +88 -0
- data/test/drugs/test_ddd.rb +22 -0
- data/test/drugs/test_dose.rb +189 -0
- data/test/drugs/test_galenic_form.rb +41 -0
- data/test/drugs/test_package.rb +172 -0
- data/test/drugs/test_part.rb +32 -0
- data/test/drugs/test_product.rb +31 -0
- data/test/drugs/test_sequence.rb +140 -0
- data/test/drugs/test_substance.rb +51 -0
- data/test/drugs/test_substance_group.rb +27 -0
- data/test/export/test_rss.rb +86 -0
- data/test/export/test_server.rb +163 -0
- data/test/export/test_xls.rb +146 -0
- data/test/export/test_yaml.rb +120 -0
- data/test/import/data/csv/products.csv +11 -0
- data/test/import/data/html/dimdi_index.html +400 -0
- data/test/import/data/html/gkv/Befreiungsliste_Arzneimittel_Versicherte.gkvnet +508 -0
- data/test/import/data/html/pharma24/1337397.html +754 -0
- data/test/import/data/html/pharma24/842756.html +570 -0
- data/test/import/data/html/pharma24/ac-page-10.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-11.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-12.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-13.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-14.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-15.html +3011 -0
- data/test/import/data/html/pharma24/ac-page-16.html +3050 -0
- data/test/import/data/html/pharma24/ac-page-17.html +3285 -0
- data/test/import/data/html/pharma24/ac-page-18.html +3109 -0
- data/test/import/data/html/pharma24/ac-page-19.html +3126 -0
- data/test/import/data/html/pharma24/ac-page-2.html +3005 -0
- data/test/import/data/html/pharma24/ac-page-20.html +3007 -0
- data/test/import/data/html/pharma24/ac-page-21.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-22.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-23.html +3055 -0
- data/test/import/data/html/pharma24/ac-page-24.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-25.html +3004 -0
- data/test/import/data/html/pharma24/ac-page-26.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-27.html +3167 -0
- data/test/import/data/html/pharma24/ac-page-28.html +3236 -0
- data/test/import/data/html/pharma24/ac-page-29.html +3110 -0
- data/test/import/data/html/pharma24/ac-page-3.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-30.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-31.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-32.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-33.html +3001 -0
- data/test/import/data/html/pharma24/ac-page-34.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-35.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-36.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-37.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-38.html +3003 -0
- data/test/import/data/html/pharma24/ac-page-39.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-4.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-40.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-41.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-42.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-43.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-44.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-45.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-46.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-47.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-48.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-49.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-5.html +3168 -0
- data/test/import/data/html/pharma24/ac-page-50.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-51.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-52.html +3003 -0
- data/test/import/data/html/pharma24/ac-page-53.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-54.html +3095 -0
- data/test/import/data/html/pharma24/ac-page-55.html +3041 -0
- data/test/import/data/html/pharma24/ac-page-56.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-57.html +3001 -0
- data/test/import/data/html/pharma24/ac-page-58.html +3001 -0
- data/test/import/data/html/pharma24/ac-page-59.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-6.html +3072 -0
- data/test/import/data/html/pharma24/ac-page-60.html +3001 -0
- data/test/import/data/html/pharma24/ac-page-61.html +3005 -0
- data/test/import/data/html/pharma24/ac-page-62.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-63.html +3007 -0
- data/test/import/data/html/pharma24/ac-page-64.html +3007 -0
- data/test/import/data/html/pharma24/ac-page-65.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-66.html +3011 -0
- data/test/import/data/html/pharma24/ac-page-67.html +3026 -0
- data/test/import/data/html/pharma24/ac-page-68.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-69.html +3010 -0
- data/test/import/data/html/pharma24/ac-page-7.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-70.html +3192 -0
- data/test/import/data/html/pharma24/ac-page-71.html +3133 -0
- data/test/import/data/html/pharma24/ac-page-72.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-73.html +3227 -0
- data/test/import/data/html/pharma24/ac-page-74.html +3241 -0
- data/test/import/data/html/pharma24/ac-page-75.html +3227 -0
- data/test/import/data/html/pharma24/ac-page-76.html +3244 -0
- data/test/import/data/html/pharma24/ac-page-77.html +1164 -0
- data/test/import/data/html/pharma24/ac-page-8.html +2999 -0
- data/test/import/data/html/pharma24/ac-page-9.html +2999 -0
- data/test/import/data/html/pharma24/ac.html +2999 -0
- data/test/import/data/html/pharmnet/display.html +662 -0
- data/test/import/data/html/pharmnet/display1.html +625 -0
- data/test/import/data/html/pharmnet/display2.html +625 -0
- data/test/import/data/html/pharmnet/display3.html +625 -0
- data/test/import/data/html/pharmnet/display_tramal.html +634 -0
- data/test/import/data/html/pharmnet/empty_result.html +395 -0
- data/test/import/data/html/pharmnet/gate.html +246 -0
- data/test/import/data/html/pharmnet/index.html +258 -0
- data/test/import/data/html/pharmnet/paged_result_1.html +401 -0
- data/test/import/data/html/pharmnet/paged_result_2.html +401 -0
- data/test/import/data/html/pharmnet/result.html +401 -0
- data/test/import/data/html/pharmnet/search.html +865 -0
- data/test/import/data/html/pharmnet/search_filtered.html +182 -0
- data/test/import/data/html/whocc/A.html +56 -0
- data/test/import/data/html/whocc/A03.html +48 -0
- data/test/import/data/html/whocc/A03AB.html +48 -0
- data/test/import/data/html/whocc/A06AA.html +47 -0
- data/test/import/data/html/whocc/C03.html +47 -0
- data/test/import/data/html/whocc/login.html +77 -0
- data/test/import/data/mail/csv.mail +81 -0
- data/test/import/data/rtf/pharmnet/aarane.pi.rtf +648 -0
- data/test/import/data/rtf/pharmnet/ace_hemmer_ratio.pi.rtf +324 -0
- data/test/import/data/rtf/pharmnet/ace_hemmer_ratio.rtf +4816 -0
- data/test/import/data/rtf/pharmnet/acemetacin.pi.rtf +388 -0
- data/test/import/data/rtf/pharmnet/acemit.pi.rtf +240 -0
- data/test/import/data/rtf/pharmnet/acerbon.pi.rtf +1257 -0
- data/test/import/data/rtf/pharmnet/acetylcystein.pi.rtf +323 -0
- data/test/import/data/rtf/pharmnet/aciclo.pi.rtf +287 -0
- data/test/import/data/rtf/pharmnet/aciclovir.pi.rtf +236 -0
- data/test/import/data/rtf/pharmnet/actrapid.pi.rtf +322 -0
- data/test/import/data/rtf/pharmnet/amlodipin.pi.rtf +452 -0
- data/test/import/data/rtf/pharmnet/amlodipin.rtf +473 -0
- data/test/import/data/rtf/pharmnet/aspirin.pi.rtf +313 -0
- data/test/import/data/rtf/pharmnet/aspirin.rtf +781 -0
- data/test/import/data/rtf/pharmnet/baymycard.pi.rtf +447 -0
- data/test/import/data/rtf/pharmnet/omeprazol.pi.rtf +510 -0
- data/test/import/data/rtf/pharmnet/omeprazol.rtf +9216 -0
- data/test/import/data/rtf/pharmnet/paroxetin.pi.rtf +678 -0
- data/test/import/data/rtf/pharmnet/selegilin.pi.rtf +312 -0
- data/test/import/data/rtf/pharmnet/selegilin.rtf +683 -0
- data/test/import/data/rtf/pharmnet/valium.pi.rtf +387 -0
- data/test/import/data/txt/gkv/gkv_p1.txt +17 -0
- data/test/import/data/xls/darform_010706.xls +0 -0
- data/test/import/data/xls/fb010706.xls +0 -0
- data/test/import/data/xls/liste_zuzahlungsbefreite_arzneimittel_suchfunktion.xls +0 -0
- data/test/import/data/xls/wirkkurz_010406.xls +0 -0
- data/test/import/data/xml/ATC_2006.xml +47 -0
- data/test/import/data/xml/ATC_2006_ddd.xml +35 -0
- data/test/import/test_dimdi.rb +323 -0
- data/test/import/test_excel.rb +31 -0
- data/test/import/test_gkv.rb +260 -0
- data/test/import/test_pharma24.rb +112 -0
- data/test/import/test_pharmnet.rb +980 -0
- data/test/import/test_rtf.rb +37 -0
- data/test/import/test_whocc.rb +314 -0
- data/test/remote/drugs/test_active_agent.rb +36 -0
- data/test/selenium/selenium-server.jar +0 -0
- data/test/selenium/test_atc_browser.rb +121 -0
- data/test/selenium/test_atc_guidelines.rb +95 -0
- data/test/selenium/test_collect.rb +137 -0
- data/test/selenium/test_compare.rb +294 -0
- data/test/selenium/test_fachinfo.rb +128 -0
- data/test/selenium/test_feedback.rb +192 -0
- data/test/selenium/test_init.rb +64 -0
- data/test/selenium/test_limit.rb +304 -0
- data/test/selenium/test_login.rb +67 -0
- data/test/selenium/test_package.rb +516 -0
- data/test/selenium/test_patinfo.rb +128 -0
- data/test/selenium/test_product.rb +80 -0
- data/test/selenium/test_products.rb +141 -0
- data/test/selenium/test_search.rb +933 -0
- data/test/selenium/test_sequence.rb +513 -0
- data/test/selenium/unit.rb +190 -0
- data/test/stub/http_server.rb +144 -0
- data/test/stub/model.rb +173 -0
- data/test/suite.rb +15 -0
- data/test/test_model.rb +83 -0
- data/test/util/test_code.rb +74 -0
- data/test/util/test_ipn.rb +117 -0
- data/test/util/test_mail.rb +85 -0
- data/test/util/test_multilingual.rb +97 -0
- data/test/util/test_server.rb +94 -0
- data/test/util/test_updater.rb +130 -0
- data/test/util/test_ydim.rb +115 -0
- data/test/util/test_yus.rb +79 -0
- metadata +568 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'encoding/character/utf-8'
|
2
|
+
require 'iconv'
|
3
|
+
|
4
|
+
module ODDB
|
5
|
+
module Import
|
6
|
+
class Importer
|
7
|
+
@@iconv = Iconv.new('utf8//IGNORE//TRANSLIT', 'latin1')
|
8
|
+
@@lower = /^(and|for|in(cl)?|on|plain|with)$/i
|
9
|
+
attr_accessor :report
|
10
|
+
def initialize
|
11
|
+
@report = []
|
12
|
+
@skip_rows = 1
|
13
|
+
end
|
14
|
+
def capitalize_all(str)
|
15
|
+
## benchmarked fastest against an append (<<) solution
|
16
|
+
str.split(/\b/).collect { |part|
|
17
|
+
@@lower.match(part) ? part.downcase : part.capitalize }.join
|
18
|
+
end
|
19
|
+
def company_name(cname)
|
20
|
+
cname = capitalize_all(cname.to_s)
|
21
|
+
cname.gsub!(/\.(?!\s)/, '. ')
|
22
|
+
cname.gsub!(/[\/&]/) { |match| ' %s ' % match }
|
23
|
+
cname.gsub!(/Gmbh/, 'GmbH')
|
24
|
+
cname.gsub!(/Ag\b/, 'AG')
|
25
|
+
cname.gsub!(/\bKg\b/, 'KG')
|
26
|
+
cname.strip!
|
27
|
+
u(cname)
|
28
|
+
end
|
29
|
+
def postprocess
|
30
|
+
end
|
31
|
+
def utf8(str)
|
32
|
+
u @@iconv.iconv(str) if str
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,211 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Import::Pharma24 -- de.oddb.org -- 21.04.2008 -- hwyss@ywesee.com
|
3
|
+
|
4
|
+
require 'oddb/import/importer'
|
5
|
+
require 'oddb/util/money'
|
6
|
+
|
7
|
+
module ODDB
|
8
|
+
module Import
|
9
|
+
class Pharma24 < Importer
|
10
|
+
def initialize
|
11
|
+
@count = 0
|
12
|
+
@created_companies = 0
|
13
|
+
@found = 0
|
14
|
+
@host = 'http://www.apotheke-online-internet.de'
|
15
|
+
end
|
16
|
+
def import(agent, packages, opts={:all => false})
|
17
|
+
agent.max_history = 1
|
18
|
+
packages.collect! { |package| package.odba_id }
|
19
|
+
while id = packages.shift
|
20
|
+
update_package(agent, ODBA.cache.fetch(id), opts)
|
21
|
+
end
|
22
|
+
report
|
23
|
+
end
|
24
|
+
def import_company(data)
|
25
|
+
name = company_name(data[:company])
|
26
|
+
company = Business::Company.find_by_name(name)
|
27
|
+
if(company.nil?)
|
28
|
+
@created_companies += 1
|
29
|
+
company = Business::Company.new
|
30
|
+
company.name.de = name
|
31
|
+
end
|
32
|
+
company
|
33
|
+
end
|
34
|
+
def import_size(data, package)
|
35
|
+
part = package.parts.first || package.add_part(Drugs::Part.new)
|
36
|
+
dose, size, multi = data[:size].to_s.split(/x/i, 3).reverse.compact
|
37
|
+
unit = data[:unit].to_s
|
38
|
+
if(unit != 'St')
|
39
|
+
part.quantity = Drugs::Dose.new(dose, unit)
|
40
|
+
elsif(multi.nil?)
|
41
|
+
multi = size
|
42
|
+
size = dose
|
43
|
+
end
|
44
|
+
multi = multi.to_i
|
45
|
+
size = size.to_i
|
46
|
+
part.multi = (multi > 0) ? multi : nil
|
47
|
+
part.size = (size > 0) ? size : nil
|
48
|
+
if(unitname = data[:unitname])
|
49
|
+
unit = Drugs::Unit.find_by_name(unitname)
|
50
|
+
unless(unit)
|
51
|
+
unit = Drugs::Unit.new
|
52
|
+
unit.name.de = unitname
|
53
|
+
unit.save
|
54
|
+
end
|
55
|
+
part.unit = unit
|
56
|
+
end
|
57
|
+
part.save
|
58
|
+
end
|
59
|
+
def interesting_tables node
|
60
|
+
(node/'table').find_all do |inner_node| !(inner_node/'h2/a').empty? end.to_a
|
61
|
+
end
|
62
|
+
def get_alphabetical agent, fst, snd
|
63
|
+
url = "#@host/#{fst}#{snd}.html"
|
64
|
+
page = agent.get url
|
65
|
+
data = extract_data page
|
66
|
+
while (link = (page/'//a[@class="pageResults"]').last) \
|
67
|
+
&& link.inner_text == '[n?chste?>>]'
|
68
|
+
page = agent.get link.attributes['href']
|
69
|
+
data.concat extract_data(page)
|
70
|
+
end
|
71
|
+
data
|
72
|
+
end
|
73
|
+
def extract_data page
|
74
|
+
data = []
|
75
|
+
## this should be page/'table[h2/a]'
|
76
|
+
# -> but Nokogiri apparently can't handle that
|
77
|
+
all_tables = interesting_tables page
|
78
|
+
duplicates = []
|
79
|
+
all_tables.each do |table|
|
80
|
+
duplicates.concat interesting_tables(table)
|
81
|
+
end
|
82
|
+
(all_tables - duplicates).each do |table|
|
83
|
+
link, = table/'h2/a'
|
84
|
+
if link
|
85
|
+
prod = {
|
86
|
+
:name => utf8(link.inner_text),
|
87
|
+
:url => link.attribute('href').to_s,
|
88
|
+
}
|
89
|
+
if price = (table/:strong).first
|
90
|
+
prod.store :price_public, price.inner_text.tr(',', '.').to_f
|
91
|
+
end
|
92
|
+
## should be (table/'td[text()="Abgabehinweis:"]').first
|
93
|
+
# -> but Nokogiri apparently can't handle that
|
94
|
+
if prescription = td_with_text(table, "Abgabehinweis:")
|
95
|
+
td, = prescription.xpath('following-sibling::td')
|
96
|
+
prod.store :code_prescription,
|
97
|
+
!!/Rezeptpflichtig/.match(td.inner_text)
|
98
|
+
end
|
99
|
+
## should be (table/'td[text()="Packungsinhalt:"]').first
|
100
|
+
# -> but Nokogiri apparently can't handle that
|
101
|
+
if content = td_with_text(table, "Packungsinhalt:")
|
102
|
+
td, = content.xpath('following-sibling::td')
|
103
|
+
size_str = td.inner_text
|
104
|
+
if match = /\s*(.*)\s+(\S+)\s+(\S+)\s*$/.match(size_str)
|
105
|
+
size = utf8 match[1]
|
106
|
+
unit = utf8 match[2]
|
107
|
+
name = utf8 match[3]
|
108
|
+
if size.empty?
|
109
|
+
size, unit, name = unit, name, nil
|
110
|
+
end
|
111
|
+
prod.update :size => size, :unit => unit, :unitname => name
|
112
|
+
end
|
113
|
+
end
|
114
|
+
if company = (table/'a[@class="liste"]').first
|
115
|
+
prod.store :company, utf8(company.inner_text)
|
116
|
+
end
|
117
|
+
data.push prod
|
118
|
+
end
|
119
|
+
end
|
120
|
+
data
|
121
|
+
end
|
122
|
+
def report
|
123
|
+
lines = [
|
124
|
+
sprintf("Checked %5i Packages", @count),
|
125
|
+
sprintf("Updated %5i Packages", @found),
|
126
|
+
sprintf("Created %5i Companies", @created_companies),
|
127
|
+
]
|
128
|
+
lines
|
129
|
+
end
|
130
|
+
def search agent, term
|
131
|
+
url = "#@host/advanced_search_result.php?keywords=#{term}"
|
132
|
+
page = agent.get url
|
133
|
+
extract_data page
|
134
|
+
rescue Zlib::GzipFile::Error => err
|
135
|
+
retries ||= 3
|
136
|
+
if retries > 0
|
137
|
+
retries -= 1
|
138
|
+
retry
|
139
|
+
else
|
140
|
+
err.message << " after 3 retries - url: #{url}"
|
141
|
+
raise err
|
142
|
+
end
|
143
|
+
rescue StandardError => err
|
144
|
+
err.message << " url: #{url}"
|
145
|
+
raise err
|
146
|
+
end
|
147
|
+
def td_with_text table, text
|
148
|
+
nodes = (table/'td').find_all do |node|
|
149
|
+
node.text.strip == text
|
150
|
+
end
|
151
|
+
nodes.first
|
152
|
+
end
|
153
|
+
def update_package agent, package, opts={}
|
154
|
+
price = package.price(:public)
|
155
|
+
resale = [ :pharma24,
|
156
|
+
:csv_product_infos ].include?(package.data_origin(:price_public))
|
157
|
+
needs_update = opts[:all] || price.nil? || resale
|
158
|
+
if needs_update && (code = package.code(:cid, 'DE'))
|
159
|
+
@count += 1
|
160
|
+
data, = search agent, code.value
|
161
|
+
if data
|
162
|
+
@found += 1
|
163
|
+
package.name.de = u(data[:name])
|
164
|
+
presc = data[:code_prescription]
|
165
|
+
if(code = package.code(:prescription))
|
166
|
+
if(code.value != presc)
|
167
|
+
code.value = presc
|
168
|
+
end
|
169
|
+
else
|
170
|
+
package.add_code Util::Code.new(:prescription, presc, 'DE')
|
171
|
+
end
|
172
|
+
amount = data[:price_public]
|
173
|
+
if(amount > 0)
|
174
|
+
update_price package, :public, amount
|
175
|
+
if presc
|
176
|
+
update_price package, :exfactory, package._price_exfactory
|
177
|
+
end
|
178
|
+
end
|
179
|
+
import_size data, package
|
180
|
+
package.save
|
181
|
+
if((product = package.product) && product.company.nil?)
|
182
|
+
product.company = import_company(data)
|
183
|
+
product.save
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
def update_price package, type, amount
|
189
|
+
dotype = :"price_#{type}"
|
190
|
+
# if this price has been edited manually we won't overwrite
|
191
|
+
unless((data_origin = package.data_origin(dotype)) \
|
192
|
+
&& data_origin.to_s.include?('@'))
|
193
|
+
either = false
|
194
|
+
if(price = package.price(type, 'DE'))
|
195
|
+
if(price != amount)
|
196
|
+
price.amount = amount
|
197
|
+
either = true
|
198
|
+
end
|
199
|
+
else
|
200
|
+
price = Util::Money.new(amount, type, 'DE')
|
201
|
+
package.add_price(price)
|
202
|
+
either = true
|
203
|
+
end
|
204
|
+
if either
|
205
|
+
package.data_origins.store dotype, :pharma24
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
@@ -0,0 +1,1186 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Import::PharmNet -- de.oddb.org -- 15.10.2007 -- hwyss@ywesee.com
|
3
|
+
|
4
|
+
require 'fileutils'
|
5
|
+
require 'htmlentities'
|
6
|
+
require 'mechanize'
|
7
|
+
require 'oddb/import/importer'
|
8
|
+
require 'oddb/import/rtf'
|
9
|
+
require 'oddb/util/mail'
|
10
|
+
require 'pp'
|
11
|
+
|
12
|
+
module ODDB
|
13
|
+
module Import
|
14
|
+
module PharmNet
|
15
|
+
class EncodedParser < Mechanize::Page
|
16
|
+
@@iconv = Iconv.new('utf8', 'latin1')
|
17
|
+
def initialize(uri=nil, response=nil, body=nil, code=nil)
|
18
|
+
body = @@iconv.iconv(body.gsub(/iso-8859-1/i, 'utf-8'))
|
19
|
+
## HtmlEntities seems to kill the parser, do it manually for now
|
20
|
+
#htmlentities = HTMLEntities.new
|
21
|
+
#body = htmlentities.decode(body)
|
22
|
+
body.gsub! 'á', 'á'
|
23
|
+
body.gsub! 'à', 'à'
|
24
|
+
body.gsub! 'ä', 'ä'
|
25
|
+
body.gsub! 'é', 'é'
|
26
|
+
body.gsub! 'è', 'è'
|
27
|
+
body.gsub! 'ë', 'ë'
|
28
|
+
body.gsub! 'í', 'í'
|
29
|
+
body.gsub! 'ì', 'ì'
|
30
|
+
body.gsub! 'ï', 'ï'
|
31
|
+
body.gsub! 'ó', 'ó'
|
32
|
+
body.gsub! 'ò', 'ò'
|
33
|
+
body.gsub! 'ö', 'ö'
|
34
|
+
body.gsub! 'ú', 'ú'
|
35
|
+
body.gsub! 'ù', 'ù'
|
36
|
+
body.gsub! 'ü', 'ü'
|
37
|
+
super(uri, response, body, code)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
class RenewableAgent < SimpleDelegator
|
41
|
+
def initialize agent
|
42
|
+
super
|
43
|
+
renew!
|
44
|
+
end
|
45
|
+
def renew!
|
46
|
+
agent = __getobj__.class.new
|
47
|
+
proxies = ODDB.config.http_proxies
|
48
|
+
host, port = proxies.at rand(proxies.size)
|
49
|
+
if host
|
50
|
+
ODDB.logger.debug('PharmNet') {
|
51
|
+
"Using proxy server #{host}:#{port}"
|
52
|
+
}
|
53
|
+
agent.set_proxy host, port
|
54
|
+
end
|
55
|
+
agent.pluggable_parser.html = EncodedParser
|
56
|
+
__setobj__ agent
|
57
|
+
end
|
58
|
+
end
|
59
|
+
class TermedRtf < Rtf
|
60
|
+
def initialize(term)
|
61
|
+
@term = term
|
62
|
+
end
|
63
|
+
end
|
64
|
+
class FiParser < TermedRtf
|
65
|
+
def identify_chapter buffer
|
66
|
+
name = case buffer
|
67
|
+
when /^1[08]\.?\s*Stand/i
|
68
|
+
'date'
|
69
|
+
when /^14\.?\s*Sonstige\s+Hinweise/i
|
70
|
+
'other_advice'
|
71
|
+
when /^(2|11)\.?\s*(Verschreibung|Verkauf)/i
|
72
|
+
'sale_limitation'
|
73
|
+
when /^1\.?\s*Bezeichnung/i
|
74
|
+
'name'
|
75
|
+
when /^[23]\.?\s*(Qualitative|Zusammensetzung)/i
|
76
|
+
'composition'
|
77
|
+
when /^3\.?\s*Darreichung/i
|
78
|
+
'galenic_form'
|
79
|
+
when /^3\.1\.?\s*Stoff/i
|
80
|
+
'substance_group'
|
81
|
+
when /^3\.2\.?\s*(Arzneilich|Bestandteile)/i
|
82
|
+
'active_agents'
|
83
|
+
when /^4(\.1)?\.?\s*Anwendung/i
|
84
|
+
'indications'
|
85
|
+
when /^(10|4\.2)\.?\s*Dosierung/i
|
86
|
+
'dosage'
|
87
|
+
when /^11\.?\s*Art\s+und\s+Dauer/i
|
88
|
+
'application'
|
89
|
+
when /^(5|4\.3)\.?\s*Gegenanzeigen/i
|
90
|
+
'counterindications'
|
91
|
+
when /^(8|4\.4)\.?\s*(Besondere\s+)?Warnhinweise/i
|
92
|
+
'precautions'
|
93
|
+
when /^(7|4\.5)\.?\s*Wechselwirkungen/i
|
94
|
+
'interactions'
|
95
|
+
when /^4\.6\.?\s*(Anwendung|Schwangerschaft)/i
|
96
|
+
'pregnancy'
|
97
|
+
when /^4\.7\.?\s*Auswirkung/i
|
98
|
+
'driving_ability'
|
99
|
+
when /^(6|4\.8)\.?\s*Nebenwirkungen/i
|
100
|
+
'unwanted_effects'
|
101
|
+
when /^(12|4\.9)\.?\s*(Notfall|Überdosierung)/i
|
102
|
+
'overdose'
|
103
|
+
when /^4\.?\s*Klinisch/i
|
104
|
+
'clinical'
|
105
|
+
when /^5\.1\.?\s*Pharmakodynamisch/i
|
106
|
+
'pharmacodynamics'
|
107
|
+
when /^13\.2\.?\s*Toxikologisch/i
|
108
|
+
'toxicology'
|
109
|
+
when /^(13\.3|5\.2)\.?\s*Pharmakokineti(sch|k)/i
|
110
|
+
'pharmacokinetics'
|
111
|
+
when /^13\.4\.?\s*Bioverfügbarkeit/i
|
112
|
+
'bioavailability'
|
113
|
+
when /^5\.3\.?\s*Präklinisch/i
|
114
|
+
'preclinicals'
|
115
|
+
when /^(13|5)\.?\s*Pharmakologisch/i
|
116
|
+
'pharmacology'
|
117
|
+
when /^(3\.3|6\.1)\.?\s*(Liste|Hilfsstoffe?|Sonstige\s+Bestandteile)/i
|
118
|
+
'excipients'
|
119
|
+
when /^(9|6\.2)\.?\s*(Wichtigste\s+)?Inkompatibilitäten/i
|
120
|
+
'incompatibilities'
|
121
|
+
when /^(15|6\.3)\.?\s*(Dauer|Haltbarkeit)/i
|
122
|
+
'shelf_life'
|
123
|
+
when /^(16|6\.4)\.?\s*(Besondere|Lagerung|Aufbewahrung)/i
|
124
|
+
'storage'
|
125
|
+
when /^6\.5\.?\s*(Art|Behältnis)/i,
|
126
|
+
/^17\.?\s*Darreichungsformen\s+und\sPackung/
|
127
|
+
'packaging'
|
128
|
+
when /^6\.6\.?\s*(Besondere|Hinweis|Entsorgung)/i
|
129
|
+
'disposal'
|
130
|
+
when /^6\.?\sPharmazeutisch/i
|
131
|
+
'pharmaceutic'
|
132
|
+
when /^(19|7)\.?\s*(Name|Pharmazeutischer|Inhaber)/i
|
133
|
+
'company'
|
134
|
+
when /^20\.?\s*(Name|Hersteller)/i
|
135
|
+
'producer'
|
136
|
+
when /^8\.?\s*Zulassung/i
|
137
|
+
'registration'
|
138
|
+
when /^9\.?\s*Datum/i
|
139
|
+
'registration_date'
|
140
|
+
when /^zusätzliche Angaben/i
|
141
|
+
'additional_information'
|
142
|
+
end
|
143
|
+
if(name && !@document.chapter(name))
|
144
|
+
@document.add_chapter Text::Chapter.new(name)
|
145
|
+
end
|
146
|
+
super
|
147
|
+
end
|
148
|
+
def _sanitize_text(value)
|
149
|
+
if @buffer.empty? && @buffer.is_a?(Text::Paragraph)
|
150
|
+
value.gsub! /^([BF][A-Z0-9]{1,2})?\s*/, ''
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
class PiParser < TermedRtf
|
155
|
+
def identify_chapter buffer
|
156
|
+
name = nil
|
157
|
+
if(/\b#@term\b/i.match buffer)
|
158
|
+
name = case buffer
|
159
|
+
when /wof(ü|Ü|ue)r\s+(wird|werden)\s+(es|sie)\s+(angewendet|eingenommen)/i,
|
160
|
+
/wird\s+angewendet$/i
|
161
|
+
'indications'
|
162
|
+
when /^(3\.?\s*)?Wie\s+(ist|sind).+?(anzuwenden|einzunehmen)\?/i
|
163
|
+
'application'
|
164
|
+
when /vor\s+der\s+(Anwendung|Einnahme)\s+von/i
|
165
|
+
'precautions'
|
166
|
+
when /^([56]\.?\s*)?Wie\s+(ist|sind).+?aufzubewahren/i
|
167
|
+
'storage'
|
168
|
+
when /^Bitte\s.+für\s+Kinder\s+nicht\s+erreichbar/i
|
169
|
+
'personal'
|
170
|
+
when /^([45]\.?\s*)?Welche\s+Nebenwirkungen/i, /^Nebenwirkungen:?$/i
|
171
|
+
'unwanted_effects'
|
172
|
+
when /Behandlungserfolg/i
|
173
|
+
nil ## prevent composition if this is a dodgy match
|
174
|
+
else
|
175
|
+
'composition'
|
176
|
+
end
|
177
|
+
else
|
178
|
+
name = case buffer
|
179
|
+
when /^([45]\.?\s*)?Welche\s+Nebenwirkungen/i, /^Nebenwirkungen:?$/i
|
180
|
+
'unwanted_effects'
|
181
|
+
when /^(4\.?\s*)?Verhalten\s+im\s+Notfall/i
|
182
|
+
'emergency'
|
183
|
+
when /^(6\.?\s*)?(Weitere\s+)?(Informationen|Angaben)/i,
|
184
|
+
/^(6\.?\s*)?Gebrauchsanleitung/i,
|
185
|
+
/^Zusätzliche\s+Informationen/i
|
186
|
+
'additional_information'
|
187
|
+
when /^Anwendungsgebiete/i
|
188
|
+
'indications'
|
189
|
+
when /^Vorsichtsma(ss|ß)nahmen/i
|
190
|
+
'precautions'
|
191
|
+
when /^Dosierung\s*($|und)/i, /^Dosierungsanleitung/
|
192
|
+
'application'
|
193
|
+
when /Angaben\s+zur\s+Haltbarkeit/i
|
194
|
+
'storage'
|
195
|
+
when /^Gegenanzeigen/i
|
196
|
+
'counterindications'
|
197
|
+
when /^Darreichungsform/i
|
198
|
+
'packaging'
|
199
|
+
when /^(Hersteller.+)?Pharmazeutischer\s+Unternehmer/i,
|
200
|
+
/^Pharmazeutischer\s+Hersteller/i
|
201
|
+
'company'
|
202
|
+
when /^\s*Stand\b/, /wurde\s+zuletzt\s+überarbeitet/i
|
203
|
+
'date'
|
204
|
+
when /^(Sehr\s+geehrte|Liebe)r?\s+Patient/i,
|
205
|
+
/^Bitte\s.+für\s+Kinder\s+nicht\s+erreichbar/i
|
206
|
+
'personal'
|
207
|
+
end
|
208
|
+
end
|
209
|
+
composition = @document.chapter('composition')
|
210
|
+
if(name && (name == 'composition' || composition))
|
211
|
+
chapter = @document.chapter(name)
|
212
|
+
if(chapter.nil?)
|
213
|
+
@document.add_chapter Text::Chapter.new(name)
|
214
|
+
else
|
215
|
+
pars = chapter.paragraphs.select do |par| !par.to_s.strip.empty? end
|
216
|
+
if(pars.size == 1 && /^\d+/.match(pars.first))
|
217
|
+
## some PI insert a document-overview after the composition, in which
|
218
|
+
# case we have an erroneous chapter, identified by only consisting of
|
219
|
+
# a heading. In that case:
|
220
|
+
composition.append chapter
|
221
|
+
@document.remove_chapter chapter
|
222
|
+
@document.add_chapter Text::Chapter.new(name)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
super
|
227
|
+
end
|
228
|
+
def _sanitize_text(value)
|
229
|
+
## some rtfs have unusable information prior to the actual PI
|
230
|
+
case value
|
231
|
+
when /^PCX\b/
|
232
|
+
init
|
233
|
+
when /Gebrauchsinformation/
|
234
|
+
init if /Recyclinglogo/.match(current_chapter.to_s)
|
235
|
+
end
|
236
|
+
if @buffer.empty? && @buffer.is_a?(Text::Paragraph)
|
237
|
+
value.gsub! /^([P][A-Z0-9]{1,2})?\b/, ''
|
238
|
+
value.lstrip!
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
class Importer < Importer
|
243
|
+
ERROR_EXPLANATIONS = {
|
244
|
+
"execution expired" => "the server stopped responding.",
|
245
|
+
"503 => Net::HTTPServiceUnavailable" => "the server is unavailable: http://en.wikipedia.org/wiki/HTTP_503#5xx_Server_Error",
|
246
|
+
"Invalid RTF-File: Text before rtf-version" => "the link pointed to a file that could not be parsed as RTF (probably a PDF)",
|
247
|
+
"Multiple assignment of Registration-Number" => <<-EOS,
|
248
|
+
there is already a Registration in the system with this Registration-Number.
|
249
|
+
The two Registrations should probably be merged manually.
|
250
|
+
EOS
|
251
|
+
}
|
252
|
+
attr_reader :errors
|
253
|
+
def initialize
|
254
|
+
@stop = /(Pharma(ceuticals|zeutische\s*Fabrik)?|Arzneim(ittel|\.)|GmbH|[u&]\.?\s*Co\.?|Kg|Ltd\.?|')\s*/i
|
255
|
+
@htmlentities = HTMLEntities.new
|
256
|
+
@result_cache = {}
|
257
|
+
@distance_cache = {}
|
258
|
+
@errors = {}
|
259
|
+
@assigned = Hash.new 0
|
260
|
+
@removed = Hash.new 0
|
261
|
+
@not_removed = Hash.new 0
|
262
|
+
@repaired = 0
|
263
|
+
@reparsed_fis = 0
|
264
|
+
@reparsed_pis = 0
|
265
|
+
@products_created = 0
|
266
|
+
@sequences_created = 0
|
267
|
+
@packages_created = 0
|
268
|
+
@archive = File.join ODDB.config.var, 'rtf', 'pharmnet'
|
269
|
+
@sources = {}
|
270
|
+
FileUtils.mkdir_p @archive
|
271
|
+
@latest = File.join ODDB.config.var, 'html', 'pharmnet', 'latest.html'
|
272
|
+
FileUtils.mkdir_p File.dirname(@latest)
|
273
|
+
super
|
274
|
+
end
|
275
|
+
def assign_info(key, agent, data, sequence, opts)
|
276
|
+
return(remove_info key, sequence, opts) unless(url = data[key])
|
277
|
+
|
278
|
+
sequence.send "#{key}_url=", "http://gripsdb.dimdi.de#{url}"
|
279
|
+
term = data[:search_term]
|
280
|
+
doc = import_rtf key, agent, url, term, opts
|
281
|
+
doc.date = data[:"date_#{key}"]
|
282
|
+
# arbitrary cutoff: fachinfos with less than 5 chapters can't be right...
|
283
|
+
if doc.chapters.size > 5
|
284
|
+
_assign_info key, doc, sequence, opts
|
285
|
+
else
|
286
|
+
ODDB.logger.debug('PharmNet') {
|
287
|
+
sprintf("Discarding %s for %s (%s)", key, sequence_name(sequence), term)
|
288
|
+
}
|
289
|
+
remove_info key, sequence, opts
|
290
|
+
end
|
291
|
+
rescue Timeout::Error, StandardError => error
|
292
|
+
sequence.save
|
293
|
+
ODDB.logger.error('PharmNet') {
|
294
|
+
sprintf("%s: %s", error.class, error.message) << "\n" << error.backtrace.join("\n")
|
295
|
+
}
|
296
|
+
(@errors[error.message[0,42]] ||= []).push [ sequence ? sequence_name(sequence) : '',
|
297
|
+
error.message, error.backtrace.find { |ln| /pharmnet/.match ln }.to_s.strip, url ]
|
298
|
+
end
|
299
|
+
def _assign_info(key, doc, sequence, opts={})
|
300
|
+
info = sequence.send(key)
|
301
|
+
return unless info.empty? || opts[:replace]
|
302
|
+
|
303
|
+
ODDB.logger.debug('PharmNet') {
|
304
|
+
sprintf("Assigning %s to %s", key, sequence_name(sequence))
|
305
|
+
}
|
306
|
+
info.de = doc
|
307
|
+
@assigned[key] += 1
|
308
|
+
doc.save
|
309
|
+
info.save
|
310
|
+
sequence.save
|
311
|
+
end
|
312
|
+
def assign_registration(sequence, registration)
|
313
|
+
if(registration && sequence.code(:registration, 'EU') != registration)
|
314
|
+
ODDB.logger.debug('PharmNet') {
|
315
|
+
sprintf('Assigning Registration-Number %s to %s',
|
316
|
+
registration, sequence_name(sequence))
|
317
|
+
}
|
318
|
+
if unique_registration? registration
|
319
|
+
conflict = Drugs::Sequence.find_by_code(:value => registration,
|
320
|
+
:type => 'registration',
|
321
|
+
:country => 'EU')
|
322
|
+
if(conflict && conflict != sequence)
|
323
|
+
raise sprintf("Multiple assignment of Registration-Number %s (%s-%i/%s-%i)",
|
324
|
+
registration, sequence_name(sequence), sequence.odba_id,
|
325
|
+
conflict.name.de, conflict.odba_id)
|
326
|
+
end
|
327
|
+
end
|
328
|
+
if(code = sequence.code(:registration, 'EU'))
|
329
|
+
code.value = registration
|
330
|
+
else
|
331
|
+
sequence.add_code Util::Code.new(:registration, registration, 'EU')
|
332
|
+
end
|
333
|
+
sequence.save
|
334
|
+
end
|
335
|
+
end
|
336
|
+
def best_data(sequence, result)
|
337
|
+
sname = sequence.name
|
338
|
+
unless sname.de
|
339
|
+
sname = sequence.product.name
|
340
|
+
end
|
341
|
+
comparison = [
|
342
|
+
sname,
|
343
|
+
(gf = sequence.galenic_forms.first) && gf.description,
|
344
|
+
(comp = sequence.company) && comp.name,
|
345
|
+
].collect { |ml| ml ? ml.de : '' }
|
346
|
+
suitable = suitable_data comparison, result,
|
347
|
+
:subcount => sequence.active_agents.size
|
348
|
+
max = 0
|
349
|
+
relevances = suitable.collect { |data|
|
350
|
+
rel = composition_relevance(sequence.active_agents, data)
|
351
|
+
max = rel if rel > max
|
352
|
+
}
|
353
|
+
contenders = []
|
354
|
+
relevances.each_with_index { |rel, idx|
|
355
|
+
if(rel == max)
|
356
|
+
contenders.push suitable.at(idx)
|
357
|
+
end
|
358
|
+
}
|
359
|
+
contenders.sort_by { |data| data[:date_fachinfo] || data[:date_patinfo] }.last
|
360
|
+
end
|
361
|
+
def _composition_paired_relevance(agent, detail)
|
362
|
+
adose = agent.dose.to_f
|
363
|
+
ddose = detail[:dose].to_f
|
364
|
+
drel = if(adose == 0 || adose == ddose)
|
365
|
+
1
|
366
|
+
else
|
367
|
+
if(adose < ddose)
|
368
|
+
ddose, adose = adose, ddose
|
369
|
+
end
|
370
|
+
ddose / adose
|
371
|
+
end rescue 0
|
372
|
+
ignore = /hydrochlorid/
|
373
|
+
subname = agent.substance.name.de.gsub(ignore, '')
|
374
|
+
detname = detail[:substance].gsub(ignore, '')
|
375
|
+
srel = ngram_similarity(subname, detname)
|
376
|
+
drel + srel
|
377
|
+
end
|
378
|
+
def composition_relevance(agents, data)
|
379
|
+
details = data[:composition]
|
380
|
+
participants = [agents.size, details.size].max
|
381
|
+
relevances = {}
|
382
|
+
agents.each_with_index { |agent, aidx|
|
383
|
+
details.each_with_index { |detail, didx|
|
384
|
+
relevances.store [aidx, didx],
|
385
|
+
_composition_paired_relevance(agent, detail)
|
386
|
+
}
|
387
|
+
}
|
388
|
+
max = 0
|
389
|
+
exclusive_permutation(participants).each { |pairs|
|
390
|
+
sum = pairs.inject(0) { |memo, pair|
|
391
|
+
memo + relevances[pair].to_f
|
392
|
+
}
|
393
|
+
if sum > max
|
394
|
+
data.store :pairs, pairs
|
395
|
+
max = sum
|
396
|
+
end
|
397
|
+
}
|
398
|
+
data.store :relevance, max / participants
|
399
|
+
end
|
400
|
+
def create_sequence(term, data, company, product, galform)
|
401
|
+
pname, gfname, cname = data[:data]
|
402
|
+
official = pname[/^[^\d(]+/].strip
|
403
|
+
company_name = company.name.de.gsub(@stop, '').strip
|
404
|
+
official_with_company = [ official, company_name ].join(' ')
|
405
|
+
@sequences_created += 1
|
406
|
+
sequence = Drugs::Sequence.new
|
407
|
+
composition = Drugs::Composition.new
|
408
|
+
composition.sequence = sequence
|
409
|
+
composition.galenic_form = galform
|
410
|
+
data[:composition].each do |act|
|
411
|
+
substance = import_substance act[:substance]
|
412
|
+
agent = Drugs::ActiveAgent.new substance, act[:dose]
|
413
|
+
agent.composition = composition
|
414
|
+
agent.save
|
415
|
+
end
|
416
|
+
composition.save
|
417
|
+
sequence.name.de = official_with_company
|
418
|
+
sequence.marketable = data[:marketable]
|
419
|
+
sequence.product = product
|
420
|
+
sequence.save
|
421
|
+
sequence
|
422
|
+
end
|
423
|
+
def exclusive_permutation(participants)
|
424
|
+
left = (0...participants).to_a
|
425
|
+
right = left.dup
|
426
|
+
_exclusive_permutation(left, right)
|
427
|
+
end
|
428
|
+
def _exclusive_permutation(left, right)
|
429
|
+
if(left.size == 1)
|
430
|
+
[[[left.first, right.first]]]
|
431
|
+
else
|
432
|
+
result = []
|
433
|
+
left.each { |first|
|
434
|
+
pass_left = left.reject { |val| val == first }
|
435
|
+
right.inject(result) { |memo, second|
|
436
|
+
pass_right = right.reject { |val| val == second }
|
437
|
+
_exclusive_permutation(pass_left, pass_right).each { |rest|
|
438
|
+
memo.push [[first, second]].concat(rest)
|
439
|
+
}
|
440
|
+
}
|
441
|
+
}
|
442
|
+
result
|
443
|
+
end
|
444
|
+
end
|
445
|
+
def extract_details(page)
|
446
|
+
data = {}
|
447
|
+
_extract_newest_link(data, :fachinfo, "Fachinformation", page)
|
448
|
+
_extract_newest_link(data, :patinfo, "Gebrauchsinformation", page)
|
449
|
+
table = (page/"table[@border='1']").first or return data
|
450
|
+
rows = (table/"tr")[1..-1] || []
|
451
|
+
composition = rows.collect { |row|
|
452
|
+
spans = row/"span"
|
453
|
+
{
|
454
|
+
:ask_nr => _extract_details(spans[0]),
|
455
|
+
:substance => _extract_details(spans[1]),
|
456
|
+
:dose => parse_dose(_extract_details(spans[2])),
|
457
|
+
}
|
458
|
+
}
|
459
|
+
data.store :composition, composition
|
460
|
+
previous = ''
|
461
|
+
(page/"span[@class='wbtxt']").each { |span|
|
462
|
+
case previous
|
463
|
+
when /Reg\.?-Nr\.?/
|
464
|
+
data.store :registration, span.inner_text
|
465
|
+
when /Verkehrsf/
|
466
|
+
data.store :marketable, span.inner_text.include?('ja')
|
467
|
+
end
|
468
|
+
previous = span.inner_text
|
469
|
+
}
|
470
|
+
data
|
471
|
+
end
|
472
|
+
def _extract_details(span)
|
473
|
+
@htmlentities.decode(span.inner_html).gsub(/[\t\n]|\302\240/, '')
|
474
|
+
end
|
475
|
+
def _extract_newest_link(data, key, search, page)
|
476
|
+
hrefs = page.links.inject([]) { |memo, link|
|
477
|
+
if(/#{search}\b/i.match link.text)
|
478
|
+
str = link.text[/(\d{2}\.){2}\d{4}/]
|
479
|
+
memo.push [Date.new(*str.split('.').reverse.collect { |num| num.to_i}),
|
480
|
+
link.href]
|
481
|
+
end
|
482
|
+
memo
|
483
|
+
}.sort
|
484
|
+
if(oldest = hrefs.last)
|
485
|
+
data.update :"date_#{key}" => oldest.first, key => oldest.last
|
486
|
+
end
|
487
|
+
end
|
488
|
+
def extract_result(agent, page)
|
489
|
+
form = page.form("titlesForm")
|
490
|
+
node = form.form_node
|
491
|
+
result = _extract_result node
|
492
|
+
hrefs = (node/"a").select { |link|
|
493
|
+
/^\d*1(-\d+)?$/.match link.inner_text
|
494
|
+
}.collect { |link|
|
495
|
+
link["href"]
|
496
|
+
}.sort.uniq[1..-1]
|
497
|
+
if(hrefs)
|
498
|
+
hrefs.each_with_index { |href, idx|
|
499
|
+
page = agent.get href
|
500
|
+
result.concat _extract_result(page.form("titlesForm").form_node)
|
501
|
+
}
|
502
|
+
end
|
503
|
+
result
|
504
|
+
end
|
505
|
+
def _extract_result node
|
506
|
+
rows = (node/"tr")[2..-4] || []
|
507
|
+
rows.collect { |row|
|
508
|
+
{
|
509
|
+
:data => (row/"td//span[@title]").collect { |span| span["title"] },
|
510
|
+
:href => (row/"a[@name]").first["href"],
|
511
|
+
}
|
512
|
+
}
|
513
|
+
end
|
514
|
+
def fix_composition(agents, data)
|
515
|
+
details = data[:composition]
|
516
|
+
data[:pairs].each { |aidx, didx|
|
517
|
+
agent = agents[aidx]
|
518
|
+
detail = details[didx]
|
519
|
+
if(agent.dose.nil? || agent.dose.qty == 0)
|
520
|
+
if(agent.substance == detail[:substance])
|
521
|
+
agent.dose = detail[:dose]
|
522
|
+
agent.save
|
523
|
+
@repaired += 1
|
524
|
+
elsif(!agent.chemical_equivalence)
|
525
|
+
agent.chemical_equivalence = Drugs::ActiveAgent.new agent.substance, agent.dose
|
526
|
+
agent.chemical_equivalence.save
|
527
|
+
substance = import_substance detail[:substance]
|
528
|
+
agent.substance = substance
|
529
|
+
agent.dose = detail[:dose]
|
530
|
+
agent.save
|
531
|
+
@repaired += 1
|
532
|
+
end
|
533
|
+
end
|
534
|
+
}
|
535
|
+
end
|
536
|
+
def get_details(agent, page, result)
|
537
|
+
form = page.form("titlesForm")
|
538
|
+
form.field("parinfo").value = 'true'
|
539
|
+
form.field("docBaseName").value = form.field('baseName').value
|
540
|
+
form.field("magicrequestid").value = rand.to_s
|
541
|
+
uri = URI.parse result[:href]
|
542
|
+
form.action = uri.path
|
543
|
+
uri.query.split('&').each { |param|
|
544
|
+
key, value = param.split('=', 2)
|
545
|
+
if field = form.field(key)
|
546
|
+
field.value = value
|
547
|
+
end
|
548
|
+
}
|
549
|
+
page = form.submit
|
550
|
+
end
|
551
|
+
def get_search_form(agent)
|
552
|
+
index = "http://www.pharmnet-bund.de/dynamic/de/am-info-system/index.html"
|
553
|
+
page = agent.get index
|
554
|
+
form = page.form("pharmnet_amis_off_ppv")
|
555
|
+
page = form.submit
|
556
|
+
link = page.links.find { |l| /(?<!nicht )akzeptieren/i.match l.text }
|
557
|
+
page = link.click
|
558
|
+
form = page.form("search_form")
|
559
|
+
link = page.links.find { |l| l.attributes["id"] == 'goME' }
|
560
|
+
form.action = link.href
|
561
|
+
form
|
562
|
+
end
|
563
|
+
def get_search_result(agent, term, sequence=nil, opts={})
|
564
|
+
opts = { :info_unrestricted => false,
|
565
|
+
:repair => false, :retries => 3,
|
566
|
+
:retry_unit => 60 }.merge opts
|
567
|
+
good = nil
|
568
|
+
term = term.dup
|
569
|
+
ODDB.logger.debug('PharmNet') { sprintf('Searching for %s', term) }
|
570
|
+
result = []
|
571
|
+
while result.empty?
|
572
|
+
return if term.length < 3
|
573
|
+
good = term.dup
|
574
|
+
result.concat search(agent, term, sequence, opts)
|
575
|
+
if(result.empty?)
|
576
|
+
good = term.gsub(/\s+/, '-')
|
577
|
+
result.concat search(agent, good, sequence, opts)
|
578
|
+
end
|
579
|
+
term.gsub! /\s*[^\s]+$/, ''
|
580
|
+
end
|
581
|
+
result.each { |data| data.store(:search_term, good) }
|
582
|
+
result
|
583
|
+
rescue Timeout::Error, StandardError => error
|
584
|
+
ODDB.logger.error('PharmNet') {
|
585
|
+
sprintf("%s: %s", error.class, error.message) << "\n" << error.backtrace.join("\n")
|
586
|
+
}
|
587
|
+
retries ||= opts[:retries]
|
588
|
+
if((error.is_a?(Timeout::Error) || /ServerError/.match(error.message)) \
|
589
|
+
&& retries > 0)
|
590
|
+
seconds = opts[:retry_unit] * 4 ** (opts[:retries] - retries)
|
591
|
+
ODDB.logger.debug('PharmNet') {
|
592
|
+
sprintf("Waiting %i seconds for the server to recover...", seconds)
|
593
|
+
}
|
594
|
+
sleep seconds
|
595
|
+
retries -= 1
|
596
|
+
ODDB.logger.debug('PharmNet') {
|
597
|
+
"Renewing Mechanize-agent and starting a new Session" }
|
598
|
+
agent.renew!
|
599
|
+
@search_form = nil
|
600
|
+
retry
|
601
|
+
else
|
602
|
+
(@errors[error.message[0,42]] ||= []).push [ sequence ? sequence_name(sequence) : '',
|
603
|
+
error.message, error.backtrace.find { |ln| /pharmnet/.match ln }.to_s.strip ]
|
604
|
+
end
|
605
|
+
nil
|
606
|
+
end
|
607
|
+
def identify_details(agent, term, sequence=nil,
|
608
|
+
opts = { :info_unrestricted => false,
|
609
|
+
:repair => false, :retries => 3})
|
610
|
+
if result = get_search_result(agent, term, sequence, opts)
|
611
|
+
if result.size == 1
|
612
|
+
result.first
|
613
|
+
else
|
614
|
+
best_data sequence, result
|
615
|
+
end
|
616
|
+
end
|
617
|
+
end
|
618
|
+
def identify_product(term, data, company)
|
619
|
+
pname, gfname, cname = data[:data]
|
620
|
+
official = pname[/^[^\d(]+/].strip
|
621
|
+
company_name = company.name.de.gsub(@stop, '').strip
|
622
|
+
official_with_company = [ official, company_name ].join(' ')
|
623
|
+
term_with_company = [ term, company_name ].join(' ')
|
624
|
+
[official_with_company, official, term_with_company, term].each do |cnd|
|
625
|
+
if (candidate = Drugs::Product.find_by_name(cnd)) \
|
626
|
+
&& candidate.company == company
|
627
|
+
return candidate
|
628
|
+
else
|
629
|
+
Drugs::Product.search_by_name(cnd).each do |candidate|
|
630
|
+
if candidate.company == company
|
631
|
+
return candidate
|
632
|
+
end
|
633
|
+
end
|
634
|
+
end
|
635
|
+
end
|
636
|
+
## if we can't find a product, we'll have to create a new one.
|
637
|
+
@products_created += 1
|
638
|
+
product = Drugs::Product.new
|
639
|
+
product.name.de = term_with_company
|
640
|
+
product.company = company
|
641
|
+
product.save
|
642
|
+
end
|
643
|
+
def identify_sequence(data, product, galform)
|
644
|
+
if product
|
645
|
+
doses = data[:composition].collect do |act| act[:dose] end.compact.sort
|
646
|
+
product.sequences.find do |seq|
|
647
|
+
seq.compositions.size == 1 \
|
648
|
+
&& seq.doses.compact.sort == doses \
|
649
|
+
&& seq.galenic_forms == [galform]
|
650
|
+
end
|
651
|
+
end
|
652
|
+
end
|
653
|
+
def import(agent, sequences, opts = { :replace => false,
|
654
|
+
:reload => false,
|
655
|
+
:remove => false,
|
656
|
+
:repair => false,
|
657
|
+
:reparse => false,
|
658
|
+
:reparse_patinfo => false,
|
659
|
+
:retries => 3,
|
660
|
+
:retry_unit => 60 })
|
661
|
+
Util::Mail.notify_admins sprintf("%s: %s", Time.now.strftime('%c'),
|
662
|
+
self.class), _import(agent, sequences, opts)
|
663
|
+
end
|
664
|
+
def _import(agent, sequences, opts = { :replace => false,
|
665
|
+
:reload => false,
|
666
|
+
:remove => false,
|
667
|
+
:repair => false,
|
668
|
+
:reparse => false,
|
669
|
+
:reparse_patinfo => false,
|
670
|
+
:retries => 3,
|
671
|
+
:retry_unit => 60 })
|
672
|
+
agent = RenewableAgent.new agent
|
673
|
+
if resume = opts[:resume]
|
674
|
+
resume = resume.to_s.downcase
|
675
|
+
sequences = sequences.select { |sequence|
|
676
|
+
(name = sequence_name(sequence)) && name.downcase >= resume
|
677
|
+
}
|
678
|
+
else
|
679
|
+
sequences = sequences.select { |sequence|
|
680
|
+
sequence_name(sequence)
|
681
|
+
}
|
682
|
+
end
|
683
|
+
sequences = sequences.sort_by { |sequence|
|
684
|
+
sequence_name(sequence)
|
685
|
+
}
|
686
|
+
count = 0
|
687
|
+
head = sequences.first.name
|
688
|
+
@checked = "Checked 0 Sequences"
|
689
|
+
## let odba cache release unneeded sequences ...
|
690
|
+
sequences.collect! { |sequence| sequence.odba_id }
|
691
|
+
while odba_id = sequences.shift
|
692
|
+
begin
|
693
|
+
## ... and refetch them when necessary
|
694
|
+
sequence = ODBA.cache.fetch(odba_id)
|
695
|
+
count += 1
|
696
|
+
@checked = sprintf "Checked %i Sequences from '%s' to '%s'",
|
697
|
+
count, head, sequence_name(sequence)
|
698
|
+
process(agent, sequence, opts)
|
699
|
+
rescue ODBA::OdbaError
|
700
|
+
end
|
701
|
+
end
|
702
|
+
report
|
703
|
+
end
|
704
|
+
def import_company(name)
|
705
|
+
term = clean = name.gsub(@stop, '').strip
|
706
|
+
company = Business::Company.find_by_name(term)
|
707
|
+
while company.nil? && !term.empty?
|
708
|
+
company = Business::Company.search_by_name(term).find do |gf|
|
709
|
+
relevance = ngram_similarity clean, gf.name.de.gsub(@stop, '')
|
710
|
+
relevance > 0.8
|
711
|
+
end
|
712
|
+
term = term.gsub /(^|\s)+\S+\s*$/, ''
|
713
|
+
end
|
714
|
+
if company
|
715
|
+
company.name.add_synonym name
|
716
|
+
else
|
717
|
+
company = Business::Company.new
|
718
|
+
company.name.de = name
|
719
|
+
end
|
720
|
+
company.save
|
721
|
+
company
|
722
|
+
end
|
723
|
+
def import_galenic_form(description)
|
724
|
+
galform = Drugs::GalenicForm.find_by_description(description)
|
725
|
+
unless galform
|
726
|
+
galform = Drugs::GalenicForm.search_by_description(description).find do |gf|
|
727
|
+
sim = ngram_similarity description, gf.description.de
|
728
|
+
sim > 0.75
|
729
|
+
end
|
730
|
+
if galform
|
731
|
+
galform.description.add_synonym description
|
732
|
+
galform.save
|
733
|
+
end
|
734
|
+
end
|
735
|
+
unless galform
|
736
|
+
galform = Drugs::GalenicForm.new
|
737
|
+
galform.description.de = description
|
738
|
+
galform.save
|
739
|
+
end
|
740
|
+
galform
|
741
|
+
end
|
742
|
+
def import_missing(agent, term, opts={})
|
743
|
+
@checked = "Searched for FIs/GIs for '#{term}'"
|
744
|
+
opts = { :skip_totals => true }.merge opts
|
745
|
+
agent = RenewableAgent.new agent
|
746
|
+
if result = get_search_result(agent, term, nil, opts)
|
747
|
+
result.each do |data|
|
748
|
+
company, product, galform = nil
|
749
|
+
sequence = nil
|
750
|
+
registration = data[:registration]
|
751
|
+
if registration && unique_registration?(registration)
|
752
|
+
sequence = Drugs::Sequence.find_by_code :value => registration
|
753
|
+
end
|
754
|
+
unless sequence
|
755
|
+
pname, gfname, cname = data[:data]
|
756
|
+
galform = import_galenic_form gfname
|
757
|
+
company = import_company cname
|
758
|
+
product = identify_product term, data, company
|
759
|
+
sequence = identify_sequence data, product, galform
|
760
|
+
end
|
761
|
+
if sequence
|
762
|
+
if opts[:repair]
|
763
|
+
pname, gfname, cname = data[:data]
|
764
|
+
if product = sequence.product
|
765
|
+
product.company ||= import_company cname
|
766
|
+
end
|
767
|
+
company_name = product.company.name.de.gsub(@stop, '').strip
|
768
|
+
official = pname[/^[^\d(]+/].strip
|
769
|
+
sequence.marketable = data[:marketable]
|
770
|
+
sequence.name.de = [ official, company_name ].join(' ')
|
771
|
+
agents = sequence.active_agents
|
772
|
+
relevance = composition_relevance agents, data
|
773
|
+
fix_composition agents, data
|
774
|
+
end
|
775
|
+
else
|
776
|
+
sequence = create_sequence term, data, company, product, galform
|
777
|
+
end
|
778
|
+
assign_registration sequence, data[:registration]
|
779
|
+
assign_info(:fachinfo, agent, data, sequence, opts)
|
780
|
+
assign_info(:patinfo, agent, data, sequence, opts)
|
781
|
+
import_package sequence, data, opts
|
782
|
+
end
|
783
|
+
end
|
784
|
+
report opts
|
785
|
+
end
|
786
|
+
def import_package(sequence, data, opts={})
|
787
|
+
pname, gfname, _ = data[:data]
|
788
|
+
if match = /^(?<name>.*?)\s*-\s*OP((?<size>\d+)|\((?<multi>\d+)x(?<size>\d+)\))(\((?<unit>[^)]+)\))?$/i.match(pname)
|
789
|
+
size = match[:size].to_i
|
790
|
+
multi = match[:multi] && match[:multi].to_i
|
791
|
+
package = sequence.packages.find do |pac|
|
792
|
+
pac.size == size
|
793
|
+
end
|
794
|
+
if package.nil?
|
795
|
+
@packages_created += 1
|
796
|
+
package = Drugs::Package.new
|
797
|
+
package.add_code Util::Code.new(:cid, "oddb#{package.uid}", 'DE')
|
798
|
+
package.name.de = match[:name]
|
799
|
+
part = Drugs::Part.new
|
800
|
+
part.size = size
|
801
|
+
part.unit = import_unit gfname
|
802
|
+
part.package = package
|
803
|
+
part.composition = sequence.compositions.first
|
804
|
+
part.save
|
805
|
+
package.sequence = sequence
|
806
|
+
package.save
|
807
|
+
end
|
808
|
+
package
|
809
|
+
end
|
810
|
+
end
|
811
|
+
def import_rtf(key, agent, url, term, opts = { :reparse => false,
|
812
|
+
:reload => false})
|
813
|
+
pklass = case key
|
814
|
+
when :fachinfo
|
815
|
+
FiParser
|
816
|
+
when :patinfo
|
817
|
+
PiParser
|
818
|
+
end
|
819
|
+
path = File.join @archive, File.basename(url)
|
820
|
+
doc = Text::Document.find_by_source(url)
|
821
|
+
ODDB.logger.debug('PharmNet') {
|
822
|
+
sprintf('Comparing %s-sources for %s', key, term) }
|
823
|
+
if(doc.nil? || (opts[:reparse] && !@sources[url]))
|
824
|
+
@sources.store url, true
|
825
|
+
io = nil
|
826
|
+
if(opts[:reload] || !File.exist?(path))
|
827
|
+
uri = URI.parse url
|
828
|
+
uri.scheme = 'http'
|
829
|
+
if uri.host.to_s.empty?
|
830
|
+
uri.host = 'gripsdb.dimdi.de'
|
831
|
+
end
|
832
|
+
ODDB.logger.debug('PharmNet') {
|
833
|
+
sprintf('Downloading %s for %s from %s', key, term, uri.to_s) }
|
834
|
+
file = agent.get uri.to_s
|
835
|
+
file.save path
|
836
|
+
ODDB.logger.debug('PharmNet') {
|
837
|
+
sprintf('Saving %s for %s in %s', key, term, path) }
|
838
|
+
io = StringIO.new(file.body)
|
839
|
+
else
|
840
|
+
ODDB.logger.debug('PharmNet') {
|
841
|
+
sprintf('Reading %s for %s from %s', key, term, path) }
|
842
|
+
io = File.open(path)
|
843
|
+
end
|
844
|
+
term = term.downcase.gsub(/[\s-]/, '.')
|
845
|
+
chapters = []
|
846
|
+
new = nil
|
847
|
+
while !term.empty? && chapters.size < 4
|
848
|
+
ODDB.logger.debug('PharmNet') {
|
849
|
+
sprintf('Parsing %s with term: %s', key, term) }
|
850
|
+
io.rewind
|
851
|
+
new = pklass.new(term).import io
|
852
|
+
chapters = new.chapters
|
853
|
+
term = term.gsub /(\A|\.)[^.]*$/, ''
|
854
|
+
end
|
855
|
+
## ensure that chapter-headings are bold
|
856
|
+
new.chapters.each { |chapter|
|
857
|
+
if((paragraph = chapter.paragraphs.first) \
|
858
|
+
&& (format = paragraph.formats.first))
|
859
|
+
format.augment "b"
|
860
|
+
end
|
861
|
+
}
|
862
|
+
new.source = url
|
863
|
+
if doc
|
864
|
+
doc.chapters.replace chapters
|
865
|
+
doc.save
|
866
|
+
else
|
867
|
+
doc = new
|
868
|
+
end
|
869
|
+
end
|
870
|
+
doc
|
871
|
+
end
|
872
|
+
def import_substance(name)
|
873
|
+
substance = Drugs::Substance.find_by_name name
|
874
|
+
unless(substance)
|
875
|
+
substance = Drugs::Substance.new
|
876
|
+
substance.name.de = name
|
877
|
+
substance.save
|
878
|
+
end
|
879
|
+
substance
|
880
|
+
end
|
881
|
+
def import_unit(name)
|
882
|
+
unit = Drugs::Unit.find_by_name name
|
883
|
+
unless unit
|
884
|
+
unit = Drugs::Unit.search_by_name(name).find do |unt|
|
885
|
+
sim = ngram_similarity name, unt.name.de
|
886
|
+
sim > 0.75
|
887
|
+
end
|
888
|
+
if unit
|
889
|
+
unit.name.add_synonym name
|
890
|
+
unit.save
|
891
|
+
end
|
892
|
+
end
|
893
|
+
unless unit
|
894
|
+
unit = Drugs::Unit.new
|
895
|
+
unit.name.de = name
|
896
|
+
unit.save
|
897
|
+
end
|
898
|
+
unit
|
899
|
+
end
|
900
|
+
def ngram_similarity(str1, str2, n=5)
|
901
|
+
str1 = u(str1).downcase.gsub(/[\s,.\-\/]+/, '')
|
902
|
+
str2 = u(str2).downcase.gsub(/[\s,.\-\/]+/, '')
|
903
|
+
if(str1.length < str2.length)
|
904
|
+
str1, str2 = str2, str1
|
905
|
+
end
|
906
|
+
parts = [ str1.length - n, 0 ].max + 1
|
907
|
+
count = 0
|
908
|
+
parts.times { |idx|
|
909
|
+
if(str2.include? str1[idx, n])
|
910
|
+
count += 1
|
911
|
+
end
|
912
|
+
}
|
913
|
+
count.to_f / parts
|
914
|
+
end
|
915
|
+
def parse_dose(str)
|
916
|
+
Drugs::Dose.new(str[/^\d*\.\d*/].to_f, str[/[^\d\.]+$/])
|
917
|
+
end
|
918
|
+
def process(agent, sequence, opts = { :replace => false,
|
919
|
+
:reload => false,
|
920
|
+
:remove => false,
|
921
|
+
:repair => false,
|
922
|
+
:reparse => false,
|
923
|
+
:reparse_patinfo => false,
|
924
|
+
:retries => 3,
|
925
|
+
:retry_unit => 60 })
|
926
|
+
|
927
|
+
return(reparse_fachinfo agent, sequence) if opts[:reparse] && !opts[:reparse_patinfo]
|
928
|
+
return(reparse_patinfo agent, sequence) if opts[:reparse_patinfo]
|
929
|
+
return unless sequence.fachinfo.empty? || sequence.patinfo.empty? \
|
930
|
+
|| opts[:replace] || opts[:remove]
|
931
|
+
data = identify_details(agent, sequence_name(sequence), sequence, opts)
|
932
|
+
|
933
|
+
return(remove_infos sequence, opts) unless data
|
934
|
+
|
935
|
+
cutoff = composition_relevance(sequence.active_agents, data)
|
936
|
+
return(remove_infos sequence, opts) if(cutoff <= 1.25) # arbitrary value
|
937
|
+
|
938
|
+
assign_info(:fachinfo, agent, data, sequence, opts)
|
939
|
+
assign_info(:patinfo, agent, data, sequence, opts)
|
940
|
+
|
941
|
+
fix_composition sequence.active_agents, data if(opts[:repair])
|
942
|
+
|
943
|
+
# assign registration number if really good match
|
944
|
+
return if(cutoff < 2) # arbitrary value
|
945
|
+
assign_registration sequence, data[:registration]
|
946
|
+
rescue Timeout::Error, StandardError => error
|
947
|
+
ODDB.logger.error('PharmNet') {
|
948
|
+
sprintf("%s: %s", error.class, error.message) << "\n" << error.backtrace.join("\n")
|
949
|
+
}
|
950
|
+
(@errors[error.message[0,42]] ||= []).push [ sequence_name(sequence),
|
951
|
+
error.message, error.backtrace.find { |ln| /pharmnet/.match ln }.to_s.strip ]
|
952
|
+
end
|
953
|
+
def remove_info(key, sequence, opts)
|
954
|
+
info = sequence.send(key)
|
955
|
+
if opts[:remove] && info.de
|
956
|
+
@removed[key] += 1
|
957
|
+
ODDB.logger.debug('PharmNet') {
|
958
|
+
sprintf('Removing Fachinfo from %s', sequence_name(sequence))
|
959
|
+
}
|
960
|
+
info.de = nil
|
961
|
+
sequence.save
|
962
|
+
elsif info.de
|
963
|
+
@not_removed[key] += 1
|
964
|
+
end
|
965
|
+
end
|
966
|
+
def remove_infos(sequence, opts)
|
967
|
+
remove_info :fachinfo, sequence, opts
|
968
|
+
remove_info :patinfo, sequence, opts
|
969
|
+
end
|
970
|
+
def reparse_fachinfo(agent, sequence)
|
971
|
+
if((info = sequence.fachinfo.de) && (source = info.source) \
|
972
|
+
&& (doc = import_rtf :fachinfo, agent, source, sequence_name(sequence),
|
973
|
+
:reparse => true))
|
974
|
+
@reparsed_fis += 1
|
975
|
+
info.chapters.replace doc.chapters
|
976
|
+
info.save
|
977
|
+
end
|
978
|
+
end
|
979
|
+
def reparse_patinfo(agent, sequence)
|
980
|
+
if((info = sequence.patinfo.de) && (source = info.source) \
|
981
|
+
&& (doc = import_rtf :patinfo, agent, source, sequence_name(sequence),
|
982
|
+
:reparse => true))
|
983
|
+
@reparsed_pis += 1
|
984
|
+
info.chapters.replace doc.chapters
|
985
|
+
info.save
|
986
|
+
end
|
987
|
+
end
|
988
|
+
def report opts={}
|
989
|
+
fi_sources = { }
|
990
|
+
pi_sources = { }
|
991
|
+
fi_count = pi_count = 0
|
992
|
+
unless opts[:skip_totals]
|
993
|
+
Drugs::Sequence.all { |sequence|
|
994
|
+
if(doc = sequence.fachinfo.de)
|
995
|
+
fi_count += 1
|
996
|
+
fi_sources[doc.source] = true
|
997
|
+
end
|
998
|
+
if(doc = sequence.patinfo.de)
|
999
|
+
pi_count += 1
|
1000
|
+
pi_sources[doc.source] = true
|
1001
|
+
end
|
1002
|
+
}
|
1003
|
+
end
|
1004
|
+
lines = [ @checked,
|
1005
|
+
"",
|
1006
|
+
"Assigned #{@assigned[:fachinfo]} Fachinfos",
|
1007
|
+
"Removed #{@removed[:fachinfo]} Fachinfos",
|
1008
|
+
"Kept #{@not_removed[:fachinfo]} unconfirmed Fachinfos",
|
1009
|
+
("Total: #{fi_sources.size} Fachinfos linked to #{fi_count} Sequences" \
|
1010
|
+
unless opts[:skip_totals]),
|
1011
|
+
"",
|
1012
|
+
"Assigned #{@assigned[:patinfo]} Patinfos",
|
1013
|
+
"Removed #{@removed[:patinfo]} Patinfos",
|
1014
|
+
"Kept #{@not_removed[:patinfo]} unconfirmed Patinfos",
|
1015
|
+
("Total: #{pi_sources.size} Patinfos linked to #{pi_count} Sequences" \
|
1016
|
+
unless opts[:skip_totals]),
|
1017
|
+
"",
|
1018
|
+
"Created #@products_created Products",
|
1019
|
+
"Created #@sequences_created Sequences",
|
1020
|
+
"Created #@packages_created Packages",
|
1021
|
+
"",
|
1022
|
+
"Reparsed #@reparsed_fis Fachinfos",
|
1023
|
+
"Reparsed #@reparsed_pis Patinfos",
|
1024
|
+
"Repaired #@repaired Active Agents",
|
1025
|
+
"",
|
1026
|
+
"Errors: #{@errors.values.inject(0) do |inj, errs| inj + errs.size end}",
|
1027
|
+
].compact
|
1028
|
+
errors = []
|
1029
|
+
@errors.sort.each do |key, instances|
|
1030
|
+
heading = "#{instances.size} x #{key}"
|
1031
|
+
lines.push " - #{heading}"
|
1032
|
+
errors.push "", "#{heading}:"
|
1033
|
+
if msg = ERROR_EXPLANATIONS[key]
|
1034
|
+
errors.push "This means that #{msg}"
|
1035
|
+
end
|
1036
|
+
errors.push ''
|
1037
|
+
errors.concat(instances.collect do |name, message, line, link|
|
1038
|
+
sprintf "%s: %s (%s) -> http://gripsdb.dimdi.de%s",
|
1039
|
+
name, message, line, link
|
1040
|
+
end)
|
1041
|
+
end
|
1042
|
+
lines.concat errors
|
1043
|
+
end
|
1044
|
+
def result_page(form, term)
|
1045
|
+
form.field('term').value = term
|
1046
|
+
form.submit
|
1047
|
+
end
|
1048
|
+
def search(agent, term, sequence=nil, opts={})
|
1049
|
+
term = term.downcase
|
1050
|
+
@result_cache.fetch(term) do
|
1051
|
+
if(minimal = term[0,3])
|
1052
|
+
@result_cache.delete_if { |key, _|
|
1053
|
+
key < minimal
|
1054
|
+
}
|
1055
|
+
end
|
1056
|
+
@search_form ||= get_search_form agent
|
1057
|
+
## if we need to repair the active agents, we want all results, otherwise only
|
1058
|
+
# those that have a Fach- or PatInfo to parse.
|
1059
|
+
fi_only = opts[:info_unrestricted] \
|
1060
|
+
|| (opts[:repair] && sequence && sequence.active_agents.any? { |act|
|
1061
|
+
act.dose.qty == 0 }) ? 'NO_RESTRICTION' : 'YES'
|
1062
|
+
set_fi_only(@search_form, fi_only)
|
1063
|
+
details = agent.transact {
|
1064
|
+
page = result_page @search_form, term
|
1065
|
+
if(found = _search_invalid? page, term)
|
1066
|
+
ODDB.logger.error('PharmNet') {
|
1067
|
+
sprintf "Searched for '%s' but got result for '%s' - creating new session",
|
1068
|
+
term, found
|
1069
|
+
}
|
1070
|
+
agent.renew!
|
1071
|
+
@search_form = get_search_form agent
|
1072
|
+
set_fi_only(@search_form, fi_only)
|
1073
|
+
page = result_page @search_form, term
|
1074
|
+
if(_search_invalid? page, term)
|
1075
|
+
return []
|
1076
|
+
end
|
1077
|
+
end
|
1078
|
+
page.save @latest
|
1079
|
+
result = extract_result agent, page
|
1080
|
+
result.collect do |data|
|
1081
|
+
dpg = get_details agent, page, data
|
1082
|
+
detail = data.merge extract_details(dpg)
|
1083
|
+
detail.delete :href
|
1084
|
+
detail
|
1085
|
+
end
|
1086
|
+
}
|
1087
|
+
@result_cache.store term, details
|
1088
|
+
end
|
1089
|
+
end
|
1090
|
+
def _search_invalid?(page, term)
|
1091
|
+
div = (page/"div.wbsectionsubtitlebar").last
|
1092
|
+
if(div.nil?)
|
1093
|
+
''
|
1094
|
+
elsif(!/Arzneimittelname:\s#{Regexp.escape(term)}\?/i.match(div.inner_text))
|
1095
|
+
div.inner_text[/Arzneimittelname:[^?]+/]
|
1096
|
+
end
|
1097
|
+
end
|
1098
|
+
def sequence_name sequence
|
1099
|
+
if sequence
|
1100
|
+
if name = sequence.name.de
|
1101
|
+
name
|
1102
|
+
elsif product = sequence.product
|
1103
|
+
product.name.de
|
1104
|
+
end
|
1105
|
+
end
|
1106
|
+
end
|
1107
|
+
def set_fi_only(form, status="YES")
|
1108
|
+
form.radiobuttons.each do |b|
|
1109
|
+
if b.name == "WFTYP" && b.value == status
|
1110
|
+
b.check
|
1111
|
+
end
|
1112
|
+
end
|
1113
|
+
end
|
1114
|
+
def suitable_data(comparison, selection, opts = {})
|
1115
|
+
max = 0
|
1116
|
+
sums = []
|
1117
|
+
preselection = []
|
1118
|
+
ODDB.logger.debug('PharmNet') {
|
1119
|
+
"Checking for suitable data in #{selection.size} results"
|
1120
|
+
}
|
1121
|
+
selection.each_with_index { |data, idx|
|
1122
|
+
if(dists = _suitable_data(data, comparison, opts))
|
1123
|
+
sum = dists.inject { |a,b| a+b }
|
1124
|
+
max = sum if sum > max
|
1125
|
+
sums.push sum
|
1126
|
+
preselection.push data
|
1127
|
+
end
|
1128
|
+
}
|
1129
|
+
ODDB.logger.debug('PharmNet') {
|
1130
|
+
"Found a preselection of #{preselection.size} results"
|
1131
|
+
}
|
1132
|
+
result = []
|
1133
|
+
sums.each_with_index { |sum, idx|
|
1134
|
+
if sum == max
|
1135
|
+
result.push preselection[idx]
|
1136
|
+
end
|
1137
|
+
}
|
1138
|
+
ODDB.logger.debug('PharmNet') {
|
1139
|
+
"Returning the best #{result.size} results"
|
1140
|
+
}
|
1141
|
+
result
|
1142
|
+
end
|
1143
|
+
def _suitable_data(data, comparison, opts)
|
1144
|
+
opts[:cutoff] ||= 0.25
|
1145
|
+
idx = 0
|
1146
|
+
raw = data[:data].dup
|
1147
|
+
comp = comparison.dup
|
1148
|
+
|
1149
|
+
unless(opts[:keep_dose])
|
1150
|
+
part = Regexp.escape(raw[1].to_s).gsub('\ ', ')|(')
|
1151
|
+
ptrn = /(#{part})|(\b\d+\s*m?g(\s*\/\s*\d+\s*h)?)[\-\s]*/i
|
1152
|
+
raw[0] = raw[0].gsub(ptrn, '')
|
1153
|
+
comp[0] = comp[0].gsub(ptrn, '')
|
1154
|
+
end
|
1155
|
+
|
1156
|
+
tabl = /([a-z]{4,})tab.*/i
|
1157
|
+
raw[1] = raw[1].to_s.gsub(tabl, '\1')
|
1158
|
+
# Import::Csv::ProductInfos passes a comparison without Galenic Form if
|
1159
|
+
# no suitable data is found on the first try
|
1160
|
+
if comp[1]
|
1161
|
+
comp[1] = comp[1].to_s.gsub(tabl, '\1')
|
1162
|
+
end
|
1163
|
+
dists = raw.collect { |str|
|
1164
|
+
str = str.to_s
|
1165
|
+
othr = comparison[idx]
|
1166
|
+
other = othr ? othr.to_s : str
|
1167
|
+
idx += 1
|
1168
|
+
|
1169
|
+
relevance = ngram_similarity str.gsub(@stop, ''), other.gsub(@stop, '')
|
1170
|
+
return if relevance < opts[:cutoff]
|
1171
|
+
relevance
|
1172
|
+
}
|
1173
|
+
if(subcount = opts[:subcount])
|
1174
|
+
cdist = (comp = data[:composition]) ? (subcount - comp.size).abs : subcount
|
1175
|
+
dists.push(cdist) unless cdist > 0
|
1176
|
+
else
|
1177
|
+
dists
|
1178
|
+
end
|
1179
|
+
end
|
1180
|
+
def unique_registration? code
|
1181
|
+
!/^EU/.match code.to_s
|
1182
|
+
end
|
1183
|
+
end
|
1184
|
+
end
|
1185
|
+
end
|
1186
|
+
end
|