de.oddb 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. data/Guide.txt +3 -0
  2. data/History.txt +5 -0
  3. data/LICENCE.txt +339 -0
  4. data/Manifest.txt +430 -0
  5. data/README +423 -0
  6. data/README.txt +25 -0
  7. data/Rakefile +28 -0
  8. data/bin/admin +71 -0
  9. data/bin/exportd +44 -0
  10. data/bin/oddbd +33 -0
  11. data/data/fulltext/data/dicts/french/fulltext.aff +1057 -0
  12. data/data/fulltext/data/dicts/french/fulltext.dict +91189 -0
  13. data/data/fulltext/data/dicts/french/fulltext.stop +135 -0
  14. data/data/fulltext/data/dicts/german/fulltext.aff +1233 -0
  15. data/data/fulltext/data/dicts/german/fulltext.dict +287574 -0
  16. data/data/fulltext/data/dicts/german/fulltext.stop +133 -0
  17. data/data/fulltext/data/german_compound/README +15 -0
  18. data/data/fulltext/data/german_compound/compound.pl +63 -0
  19. data/data/fulltext/data/german_compound/german.stop +20 -0
  20. data/data/fulltext/data/ispell-german-compound.tar.gz +0 -0
  21. data/data/fulltext/redist/dict_french/Makefile +12 -0
  22. data/data/fulltext/redist/dict_french/README.french +1 -0
  23. data/data/fulltext/redist/dict_french/dict_french.sql.in +7 -0
  24. data/data/fulltext/redist/dict_french/dict_snowball.c +56 -0
  25. data/data/fulltext/redist/dict_french/french_stem.c +1222 -0
  26. data/data/fulltext/redist/dict_french/french_stem.h +16 -0
  27. data/data/fulltext/redist/dict_french/subinclude.h +2 -0
  28. data/data/fulltext/redist/dict_german/Makefile +12 -0
  29. data/data/fulltext/redist/dict_german/README.german +1 -0
  30. data/data/fulltext/redist/dict_german/dict_german.sql.in +7 -0
  31. data/data/fulltext/redist/dict_german/dict_snowball.c +56 -0
  32. data/data/fulltext/redist/dict_german/german_stem.c +527 -0
  33. data/data/fulltext/redist/dict_german/german_stem.h +16 -0
  34. data/data/fulltext/redist/dict_german/subinclude.h +1 -0
  35. data/data/fulltext/redist/french_stem.c +1222 -0
  36. data/data/fulltext/redist/french_stem.h +16 -0
  37. data/data/fulltext/redist/german_stem.c +527 -0
  38. data/data/fulltext/redist/german_stem.h +16 -0
  39. data/jobs/export_chde_xls +20 -0
  40. data/jobs/export_csv +20 -0
  41. data/jobs/export_fachinfo_yaml +20 -0
  42. data/jobs/export_patinfo_yaml +20 -0
  43. data/jobs/export_yaml +20 -0
  44. data/jobs/import_dimdi +15 -0
  45. data/jobs/import_gkv +19 -0
  46. data/jobs/import_pharma24 +15 -0
  47. data/jobs/import_pharmnet +30 -0
  48. data/jobs/import_whocc +18 -0
  49. data/lib/fixes/singular.rb +9 -0
  50. data/lib/fixes/yaml.rb +13 -0
  51. data/lib/oddb.rb +13 -0
  52. data/lib/oddb/business/company.rb +18 -0
  53. data/lib/oddb/business/grant_download.rb +27 -0
  54. data/lib/oddb/business/invoice.rb +75 -0
  55. data/lib/oddb/config.rb +112 -0
  56. data/lib/oddb/currency.rb +6 -0
  57. data/lib/oddb/drugs.rb +16 -0
  58. data/lib/oddb/drugs/active_agent.rb +37 -0
  59. data/lib/oddb/drugs/atc.rb +53 -0
  60. data/lib/oddb/drugs/composition.rb +41 -0
  61. data/lib/oddb/drugs/ddd.rb +24 -0
  62. data/lib/oddb/drugs/dose.rb +107 -0
  63. data/lib/oddb/drugs/galenic_form.rb +21 -0
  64. data/lib/oddb/drugs/galenic_group.rb +17 -0
  65. data/lib/oddb/drugs/package.rb +111 -0
  66. data/lib/oddb/drugs/part.rb +55 -0
  67. data/lib/oddb/drugs/product.rb +25 -0
  68. data/lib/oddb/drugs/sequence.rb +68 -0
  69. data/lib/oddb/drugs/substance.rb +31 -0
  70. data/lib/oddb/drugs/substance_group.rb +13 -0
  71. data/lib/oddb/drugs/unit.rb +12 -0
  72. data/lib/oddb/export.rb +4 -0
  73. data/lib/oddb/export/csv.rb +94 -0
  74. data/lib/oddb/export/l10n_sessions.rb +30 -0
  75. data/lib/oddb/export/rss.rb +44 -0
  76. data/lib/oddb/export/server.rb +137 -0
  77. data/lib/oddb/export/xls.rb +127 -0
  78. data/lib/oddb/export/yaml.rb +212 -0
  79. data/lib/oddb/html/state/download.rb +13 -0
  80. data/lib/oddb/html/state/drugs/admin/package.rb +190 -0
  81. data/lib/oddb/html/state/drugs/admin/product.rb +56 -0
  82. data/lib/oddb/html/state/drugs/admin/sequence.rb +253 -0
  83. data/lib/oddb/html/state/drugs/ajax/explain_ddd_price.rb +19 -0
  84. data/lib/oddb/html/state/drugs/ajax/explain_price.rb +19 -0
  85. data/lib/oddb/html/state/drugs/ajax/global.rb +18 -0
  86. data/lib/oddb/html/state/drugs/ajax/package_infos.rb +19 -0
  87. data/lib/oddb/html/state/drugs/ajax/remote_infos.rb +19 -0
  88. data/lib/oddb/html/state/drugs/atc_browser.rb +39 -0
  89. data/lib/oddb/html/state/drugs/atc_guidelines.rb +21 -0
  90. data/lib/oddb/html/state/drugs/compare.rb +52 -0
  91. data/lib/oddb/html/state/drugs/download_export.rb +18 -0
  92. data/lib/oddb/html/state/drugs/downloads.rb +42 -0
  93. data/lib/oddb/html/state/drugs/fachinfo.rb +21 -0
  94. data/lib/oddb/html/state/drugs/feedback.rb +91 -0
  95. data/lib/oddb/html/state/drugs/global.rb +270 -0
  96. data/lib/oddb/html/state/drugs/init.rb +18 -0
  97. data/lib/oddb/html/state/drugs/login.rb +17 -0
  98. data/lib/oddb/html/state/drugs/package.rb +32 -0
  99. data/lib/oddb/html/state/drugs/patinfo.rb +21 -0
  100. data/lib/oddb/html/state/drugs/products.rb +51 -0
  101. data/lib/oddb/html/state/drugs/result.rb +125 -0
  102. data/lib/oddb/html/state/global.rb +206 -0
  103. data/lib/oddb/html/state/global_predefine.rb +17 -0
  104. data/lib/oddb/html/state/limit.rb +17 -0
  105. data/lib/oddb/html/state/login.rb +56 -0
  106. data/lib/oddb/html/state/paypal/checkout.rb +97 -0
  107. data/lib/oddb/html/state/paypal/collect.rb +19 -0
  108. data/lib/oddb/html/state/paypal/download.rb +61 -0
  109. data/lib/oddb/html/state/paypal/redirect.rb +18 -0
  110. data/lib/oddb/html/state/register_download.rb +24 -0
  111. data/lib/oddb/html/state/register_export.rb +38 -0
  112. data/lib/oddb/html/state/register_poweruser.rb +17 -0
  113. data/lib/oddb/html/state/viral/admin.rb +79 -0
  114. data/lib/oddb/html/state/viral/poweruser.rb +16 -0
  115. data/lib/oddb/html/util/annotated_list.rb +39 -0
  116. data/lib/oddb/html/util/know_it_all.rb +28 -0
  117. data/lib/oddb/html/util/known_user.rb +55 -0
  118. data/lib/oddb/html/util/lookandfeel.rb +698 -0
  119. data/lib/oddb/html/util/need_all_input.rb +29 -0
  120. data/lib/oddb/html/util/session.rb +84 -0
  121. data/lib/oddb/html/util/sort.rb +72 -0
  122. data/lib/oddb/html/util/unsaved_helper.rb +20 -0
  123. data/lib/oddb/html/util/validator.rb +59 -0
  124. data/lib/oddb/html/view/ajax/json.rb +22 -0
  125. data/lib/oddb/html/view/alpha_header.rb +28 -0
  126. data/lib/oddb/html/view/document.rb +117 -0
  127. data/lib/oddb/html/view/download.rb +33 -0
  128. data/lib/oddb/html/view/drugs/admin/package.rb +245 -0
  129. data/lib/oddb/html/view/drugs/admin/product.rb +104 -0
  130. data/lib/oddb/html/view/drugs/admin/sequence.rb +305 -0
  131. data/lib/oddb/html/view/drugs/ajax/explain_ddd_price.rb +87 -0
  132. data/lib/oddb/html/view/drugs/ajax/explain_price.rb +61 -0
  133. data/lib/oddb/html/view/drugs/ajax/package_infos.rb +105 -0
  134. data/lib/oddb/html/view/drugs/ajax/remote_infos.rb +44 -0
  135. data/lib/oddb/html/view/drugs/atc_browser.rb +68 -0
  136. data/lib/oddb/html/view/drugs/atc_guidelines.rb +94 -0
  137. data/lib/oddb/html/view/drugs/compare.rb +95 -0
  138. data/lib/oddb/html/view/drugs/download_export.rb +28 -0
  139. data/lib/oddb/html/view/drugs/downloads.rb +128 -0
  140. data/lib/oddb/html/view/drugs/fachinfo.rb +46 -0
  141. data/lib/oddb/html/view/drugs/feedback.rb +235 -0
  142. data/lib/oddb/html/view/drugs/init.rb +51 -0
  143. data/lib/oddb/html/view/drugs/legend.rb +24 -0
  144. data/lib/oddb/html/view/drugs/package.rb +403 -0
  145. data/lib/oddb/html/view/drugs/patinfo.rb +46 -0
  146. data/lib/oddb/html/view/drugs/products.rb +97 -0
  147. data/lib/oddb/html/view/drugs/result.rb +296 -0
  148. data/lib/oddb/html/view/drugs/search.rb +33 -0
  149. data/lib/oddb/html/view/drugs/template.rb +15 -0
  150. data/lib/oddb/html/view/foot.rb +52 -0
  151. data/lib/oddb/html/view/google.rb +23 -0
  152. data/lib/oddb/html/view/google_ads.rb +40 -0
  153. data/lib/oddb/html/view/head.rb +78 -0
  154. data/lib/oddb/html/view/limit.rb +109 -0
  155. data/lib/oddb/html/view/list.rb +59 -0
  156. data/lib/oddb/html/view/login.rb +38 -0
  157. data/lib/oddb/html/view/navigation.rb +67 -0
  158. data/lib/oddb/html/view/offset_header.rb +35 -0
  159. data/lib/oddb/html/view/paypal/collect.rb +95 -0
  160. data/lib/oddb/html/view/paypal/redirect.rb +51 -0
  161. data/lib/oddb/html/view/paypal/register_form.rb +149 -0
  162. data/lib/oddb/html/view/register_download.rb +29 -0
  163. data/lib/oddb/html/view/register_export.rb +29 -0
  164. data/lib/oddb/html/view/register_poweruser.rb +29 -0
  165. data/lib/oddb/html/view/rss/feedback.rb +64 -0
  166. data/lib/oddb/html/view/rss_preview.rb +61 -0
  167. data/lib/oddb/html/view/search.rb +104 -0
  168. data/lib/oddb/html/view/snapback.rb +24 -0
  169. data/lib/oddb/html/view/template.rb +56 -0
  170. data/lib/oddb/import/dimdi.rb +583 -0
  171. data/lib/oddb/import/excel.rb +45 -0
  172. data/lib/oddb/import/gkv.rb +463 -0
  173. data/lib/oddb/import/importer.rb +36 -0
  174. data/lib/oddb/import/pharma24.rb +211 -0
  175. data/lib/oddb/import/pharmnet.rb +1186 -0
  176. data/lib/oddb/import/rtf.rb +409 -0
  177. data/lib/oddb/import/whocc.rb +148 -0
  178. data/lib/oddb/import/xml.rb +15 -0
  179. data/lib/oddb/model.rb +179 -0
  180. data/lib/oddb/persistence.rb +22 -0
  181. data/lib/oddb/persistence/odba.rb +32 -0
  182. data/lib/oddb/persistence/odba/business/company.rb +13 -0
  183. data/lib/oddb/persistence/odba/business/grant_download.rb +14 -0
  184. data/lib/oddb/persistence/odba/business/invoice.rb +15 -0
  185. data/lib/oddb/persistence/odba/drugs/atc.rb +15 -0
  186. data/lib/oddb/persistence/odba/drugs/galenic_form.rb +18 -0
  187. data/lib/oddb/persistence/odba/drugs/galenic_group.rb +13 -0
  188. data/lib/oddb/persistence/odba/drugs/package.rb +25 -0
  189. data/lib/oddb/persistence/odba/drugs/product.rb +13 -0
  190. data/lib/oddb/persistence/odba/drugs/sequence.rb +21 -0
  191. data/lib/oddb/persistence/odba/drugs/substance.rb +21 -0
  192. data/lib/oddb/persistence/odba/drugs/substance_group.rb +13 -0
  193. data/lib/oddb/persistence/odba/drugs/unit.rb +13 -0
  194. data/lib/oddb/persistence/odba/export.rb +26 -0
  195. data/lib/oddb/persistence/odba/model.rb +68 -0
  196. data/lib/oddb/persistence/odba/text/document.rb +11 -0
  197. data/lib/oddb/persistence/odba/util/code.rb +11 -0
  198. data/lib/oddb/persistence/odba/util/m10l_document.rb +13 -0
  199. data/lib/oddb/persistence/og.rb +16 -0
  200. data/lib/oddb/persistence/og/drugs/composition.rb +14 -0
  201. data/lib/oddb/persistence/og/drugs/product.rb +14 -0
  202. data/lib/oddb/persistence/og/drugs/sequence.rb +15 -0
  203. data/lib/oddb/persistence/og/model.rb +25 -0
  204. data/lib/oddb/persistence/og/util/multilingual.rb +13 -0
  205. data/lib/oddb/redist/rtf_tools/reader.rb +139 -0
  206. data/lib/oddb/remote/business/company.rb +17 -0
  207. data/lib/oddb/remote/drugs/active_agent.rb +27 -0
  208. data/lib/oddb/remote/drugs/atc.rb +31 -0
  209. data/lib/oddb/remote/drugs/dose.rb +8 -0
  210. data/lib/oddb/remote/drugs/galenic_form.rb +24 -0
  211. data/lib/oddb/remote/drugs/package.rb +128 -0
  212. data/lib/oddb/remote/drugs/part.rb +30 -0
  213. data/lib/oddb/remote/drugs/substance.rb +20 -0
  214. data/lib/oddb/remote/drugs/unit.rb +20 -0
  215. data/lib/oddb/remote/object.rb +36 -0
  216. data/lib/oddb/text/chapter.rb +23 -0
  217. data/lib/oddb/text/document.rb +42 -0
  218. data/lib/oddb/text/format.rb +37 -0
  219. data/lib/oddb/text/paragraph.rb +53 -0
  220. data/lib/oddb/text/picture.rb +89 -0
  221. data/lib/oddb/text/table.rb +68 -0
  222. data/lib/oddb/util.rb +9 -0
  223. data/lib/oddb/util/annotated_list.rb +37 -0
  224. data/lib/oddb/util/code.rb +69 -0
  225. data/lib/oddb/util/comparison.rb +36 -0
  226. data/lib/oddb/util/download.rb +17 -0
  227. data/lib/oddb/util/exporter.rb +8 -0
  228. data/lib/oddb/util/feedback.rb +23 -0
  229. data/lib/oddb/util/ipn.rb +53 -0
  230. data/lib/oddb/util/job.rb +23 -0
  231. data/lib/oddb/util/logger.rb +20 -0
  232. data/lib/oddb/util/m10l_document.rb +41 -0
  233. data/lib/oddb/util/mail.rb +87 -0
  234. data/lib/oddb/util/money.rb +64 -0
  235. data/lib/oddb/util/multilingual.rb +70 -0
  236. data/lib/oddb/util/quanty.rb +3 -0
  237. data/lib/oddb/util/quanty/fact.rb +242 -0
  238. data/lib/oddb/util/quanty/main.rb +164 -0
  239. data/lib/oddb/util/quanty/parse.rb +872 -0
  240. data/lib/oddb/util/quanty/units.dump +0 -0
  241. data/lib/oddb/util/server.rb +150 -0
  242. data/lib/oddb/util/smtp_tls.rb +58 -0
  243. data/lib/oddb/util/updater.rb +161 -0
  244. data/lib/oddb/util/ydim.rb +110 -0
  245. data/lib/oddb/util/yus.rb +46 -0
  246. data/test/business/test_company.rb +29 -0
  247. data/test/business/test_grant_download.rb +29 -0
  248. data/test/drugs/test_active_agent.rb +53 -0
  249. data/test/drugs/test_atc.rb +54 -0
  250. data/test/drugs/test_composition.rb +88 -0
  251. data/test/drugs/test_ddd.rb +22 -0
  252. data/test/drugs/test_dose.rb +189 -0
  253. data/test/drugs/test_galenic_form.rb +41 -0
  254. data/test/drugs/test_package.rb +172 -0
  255. data/test/drugs/test_part.rb +32 -0
  256. data/test/drugs/test_product.rb +31 -0
  257. data/test/drugs/test_sequence.rb +140 -0
  258. data/test/drugs/test_substance.rb +51 -0
  259. data/test/drugs/test_substance_group.rb +27 -0
  260. data/test/export/test_rss.rb +86 -0
  261. data/test/export/test_server.rb +163 -0
  262. data/test/export/test_xls.rb +146 -0
  263. data/test/export/test_yaml.rb +120 -0
  264. data/test/import/data/csv/products.csv +11 -0
  265. data/test/import/data/html/dimdi_index.html +400 -0
  266. data/test/import/data/html/gkv/Befreiungsliste_Arzneimittel_Versicherte.gkvnet +508 -0
  267. data/test/import/data/html/pharma24/1337397.html +754 -0
  268. data/test/import/data/html/pharma24/842756.html +570 -0
  269. data/test/import/data/html/pharma24/ac-page-10.html +2999 -0
  270. data/test/import/data/html/pharma24/ac-page-11.html +2999 -0
  271. data/test/import/data/html/pharma24/ac-page-12.html +2999 -0
  272. data/test/import/data/html/pharma24/ac-page-13.html +2999 -0
  273. data/test/import/data/html/pharma24/ac-page-14.html +2999 -0
  274. data/test/import/data/html/pharma24/ac-page-15.html +3011 -0
  275. data/test/import/data/html/pharma24/ac-page-16.html +3050 -0
  276. data/test/import/data/html/pharma24/ac-page-17.html +3285 -0
  277. data/test/import/data/html/pharma24/ac-page-18.html +3109 -0
  278. data/test/import/data/html/pharma24/ac-page-19.html +3126 -0
  279. data/test/import/data/html/pharma24/ac-page-2.html +3005 -0
  280. data/test/import/data/html/pharma24/ac-page-20.html +3007 -0
  281. data/test/import/data/html/pharma24/ac-page-21.html +2999 -0
  282. data/test/import/data/html/pharma24/ac-page-22.html +2999 -0
  283. data/test/import/data/html/pharma24/ac-page-23.html +3055 -0
  284. data/test/import/data/html/pharma24/ac-page-24.html +2999 -0
  285. data/test/import/data/html/pharma24/ac-page-25.html +3004 -0
  286. data/test/import/data/html/pharma24/ac-page-26.html +2999 -0
  287. data/test/import/data/html/pharma24/ac-page-27.html +3167 -0
  288. data/test/import/data/html/pharma24/ac-page-28.html +3236 -0
  289. data/test/import/data/html/pharma24/ac-page-29.html +3110 -0
  290. data/test/import/data/html/pharma24/ac-page-3.html +2999 -0
  291. data/test/import/data/html/pharma24/ac-page-30.html +2999 -0
  292. data/test/import/data/html/pharma24/ac-page-31.html +2999 -0
  293. data/test/import/data/html/pharma24/ac-page-32.html +2999 -0
  294. data/test/import/data/html/pharma24/ac-page-33.html +3001 -0
  295. data/test/import/data/html/pharma24/ac-page-34.html +2999 -0
  296. data/test/import/data/html/pharma24/ac-page-35.html +2999 -0
  297. data/test/import/data/html/pharma24/ac-page-36.html +2999 -0
  298. data/test/import/data/html/pharma24/ac-page-37.html +2999 -0
  299. data/test/import/data/html/pharma24/ac-page-38.html +3003 -0
  300. data/test/import/data/html/pharma24/ac-page-39.html +2999 -0
  301. data/test/import/data/html/pharma24/ac-page-4.html +2999 -0
  302. data/test/import/data/html/pharma24/ac-page-40.html +2999 -0
  303. data/test/import/data/html/pharma24/ac-page-41.html +2999 -0
  304. data/test/import/data/html/pharma24/ac-page-42.html +2999 -0
  305. data/test/import/data/html/pharma24/ac-page-43.html +2999 -0
  306. data/test/import/data/html/pharma24/ac-page-44.html +2999 -0
  307. data/test/import/data/html/pharma24/ac-page-45.html +2999 -0
  308. data/test/import/data/html/pharma24/ac-page-46.html +2999 -0
  309. data/test/import/data/html/pharma24/ac-page-47.html +2999 -0
  310. data/test/import/data/html/pharma24/ac-page-48.html +2999 -0
  311. data/test/import/data/html/pharma24/ac-page-49.html +2999 -0
  312. data/test/import/data/html/pharma24/ac-page-5.html +3168 -0
  313. data/test/import/data/html/pharma24/ac-page-50.html +2999 -0
  314. data/test/import/data/html/pharma24/ac-page-51.html +2999 -0
  315. data/test/import/data/html/pharma24/ac-page-52.html +3003 -0
  316. data/test/import/data/html/pharma24/ac-page-53.html +2999 -0
  317. data/test/import/data/html/pharma24/ac-page-54.html +3095 -0
  318. data/test/import/data/html/pharma24/ac-page-55.html +3041 -0
  319. data/test/import/data/html/pharma24/ac-page-56.html +2999 -0
  320. data/test/import/data/html/pharma24/ac-page-57.html +3001 -0
  321. data/test/import/data/html/pharma24/ac-page-58.html +3001 -0
  322. data/test/import/data/html/pharma24/ac-page-59.html +2999 -0
  323. data/test/import/data/html/pharma24/ac-page-6.html +3072 -0
  324. data/test/import/data/html/pharma24/ac-page-60.html +3001 -0
  325. data/test/import/data/html/pharma24/ac-page-61.html +3005 -0
  326. data/test/import/data/html/pharma24/ac-page-62.html +2999 -0
  327. data/test/import/data/html/pharma24/ac-page-63.html +3007 -0
  328. data/test/import/data/html/pharma24/ac-page-64.html +3007 -0
  329. data/test/import/data/html/pharma24/ac-page-65.html +2999 -0
  330. data/test/import/data/html/pharma24/ac-page-66.html +3011 -0
  331. data/test/import/data/html/pharma24/ac-page-67.html +3026 -0
  332. data/test/import/data/html/pharma24/ac-page-68.html +2999 -0
  333. data/test/import/data/html/pharma24/ac-page-69.html +3010 -0
  334. data/test/import/data/html/pharma24/ac-page-7.html +2999 -0
  335. data/test/import/data/html/pharma24/ac-page-70.html +3192 -0
  336. data/test/import/data/html/pharma24/ac-page-71.html +3133 -0
  337. data/test/import/data/html/pharma24/ac-page-72.html +2999 -0
  338. data/test/import/data/html/pharma24/ac-page-73.html +3227 -0
  339. data/test/import/data/html/pharma24/ac-page-74.html +3241 -0
  340. data/test/import/data/html/pharma24/ac-page-75.html +3227 -0
  341. data/test/import/data/html/pharma24/ac-page-76.html +3244 -0
  342. data/test/import/data/html/pharma24/ac-page-77.html +1164 -0
  343. data/test/import/data/html/pharma24/ac-page-8.html +2999 -0
  344. data/test/import/data/html/pharma24/ac-page-9.html +2999 -0
  345. data/test/import/data/html/pharma24/ac.html +2999 -0
  346. data/test/import/data/html/pharmnet/display.html +662 -0
  347. data/test/import/data/html/pharmnet/display1.html +625 -0
  348. data/test/import/data/html/pharmnet/display2.html +625 -0
  349. data/test/import/data/html/pharmnet/display3.html +625 -0
  350. data/test/import/data/html/pharmnet/display_tramal.html +634 -0
  351. data/test/import/data/html/pharmnet/empty_result.html +395 -0
  352. data/test/import/data/html/pharmnet/gate.html +246 -0
  353. data/test/import/data/html/pharmnet/index.html +258 -0
  354. data/test/import/data/html/pharmnet/paged_result_1.html +401 -0
  355. data/test/import/data/html/pharmnet/paged_result_2.html +401 -0
  356. data/test/import/data/html/pharmnet/result.html +401 -0
  357. data/test/import/data/html/pharmnet/search.html +865 -0
  358. data/test/import/data/html/pharmnet/search_filtered.html +182 -0
  359. data/test/import/data/html/whocc/A.html +56 -0
  360. data/test/import/data/html/whocc/A03.html +48 -0
  361. data/test/import/data/html/whocc/A03AB.html +48 -0
  362. data/test/import/data/html/whocc/A06AA.html +47 -0
  363. data/test/import/data/html/whocc/C03.html +47 -0
  364. data/test/import/data/html/whocc/login.html +77 -0
  365. data/test/import/data/mail/csv.mail +81 -0
  366. data/test/import/data/rtf/pharmnet/aarane.pi.rtf +648 -0
  367. data/test/import/data/rtf/pharmnet/ace_hemmer_ratio.pi.rtf +324 -0
  368. data/test/import/data/rtf/pharmnet/ace_hemmer_ratio.rtf +4816 -0
  369. data/test/import/data/rtf/pharmnet/acemetacin.pi.rtf +388 -0
  370. data/test/import/data/rtf/pharmnet/acemit.pi.rtf +240 -0
  371. data/test/import/data/rtf/pharmnet/acerbon.pi.rtf +1257 -0
  372. data/test/import/data/rtf/pharmnet/acetylcystein.pi.rtf +323 -0
  373. data/test/import/data/rtf/pharmnet/aciclo.pi.rtf +287 -0
  374. data/test/import/data/rtf/pharmnet/aciclovir.pi.rtf +236 -0
  375. data/test/import/data/rtf/pharmnet/actrapid.pi.rtf +322 -0
  376. data/test/import/data/rtf/pharmnet/amlodipin.pi.rtf +452 -0
  377. data/test/import/data/rtf/pharmnet/amlodipin.rtf +473 -0
  378. data/test/import/data/rtf/pharmnet/aspirin.pi.rtf +313 -0
  379. data/test/import/data/rtf/pharmnet/aspirin.rtf +781 -0
  380. data/test/import/data/rtf/pharmnet/baymycard.pi.rtf +447 -0
  381. data/test/import/data/rtf/pharmnet/omeprazol.pi.rtf +510 -0
  382. data/test/import/data/rtf/pharmnet/omeprazol.rtf +9216 -0
  383. data/test/import/data/rtf/pharmnet/paroxetin.pi.rtf +678 -0
  384. data/test/import/data/rtf/pharmnet/selegilin.pi.rtf +312 -0
  385. data/test/import/data/rtf/pharmnet/selegilin.rtf +683 -0
  386. data/test/import/data/rtf/pharmnet/valium.pi.rtf +387 -0
  387. data/test/import/data/txt/gkv/gkv_p1.txt +17 -0
  388. data/test/import/data/xls/darform_010706.xls +0 -0
  389. data/test/import/data/xls/fb010706.xls +0 -0
  390. data/test/import/data/xls/liste_zuzahlungsbefreite_arzneimittel_suchfunktion.xls +0 -0
  391. data/test/import/data/xls/wirkkurz_010406.xls +0 -0
  392. data/test/import/data/xml/ATC_2006.xml +47 -0
  393. data/test/import/data/xml/ATC_2006_ddd.xml +35 -0
  394. data/test/import/test_dimdi.rb +323 -0
  395. data/test/import/test_excel.rb +31 -0
  396. data/test/import/test_gkv.rb +260 -0
  397. data/test/import/test_pharma24.rb +112 -0
  398. data/test/import/test_pharmnet.rb +980 -0
  399. data/test/import/test_rtf.rb +37 -0
  400. data/test/import/test_whocc.rb +314 -0
  401. data/test/remote/drugs/test_active_agent.rb +36 -0
  402. data/test/selenium/selenium-server.jar +0 -0
  403. data/test/selenium/test_atc_browser.rb +121 -0
  404. data/test/selenium/test_atc_guidelines.rb +95 -0
  405. data/test/selenium/test_collect.rb +137 -0
  406. data/test/selenium/test_compare.rb +294 -0
  407. data/test/selenium/test_fachinfo.rb +128 -0
  408. data/test/selenium/test_feedback.rb +192 -0
  409. data/test/selenium/test_init.rb +64 -0
  410. data/test/selenium/test_limit.rb +304 -0
  411. data/test/selenium/test_login.rb +67 -0
  412. data/test/selenium/test_package.rb +516 -0
  413. data/test/selenium/test_patinfo.rb +128 -0
  414. data/test/selenium/test_product.rb +80 -0
  415. data/test/selenium/test_products.rb +141 -0
  416. data/test/selenium/test_search.rb +933 -0
  417. data/test/selenium/test_sequence.rb +513 -0
  418. data/test/selenium/unit.rb +190 -0
  419. data/test/stub/http_server.rb +144 -0
  420. data/test/stub/model.rb +173 -0
  421. data/test/suite.rb +15 -0
  422. data/test/test_model.rb +83 -0
  423. data/test/util/test_code.rb +74 -0
  424. data/test/util/test_ipn.rb +117 -0
  425. data/test/util/test_mail.rb +85 -0
  426. data/test/util/test_multilingual.rb +97 -0
  427. data/test/util/test_server.rb +94 -0
  428. data/test/util/test_updater.rb +130 -0
  429. data/test/util/test_ydim.rb +115 -0
  430. data/test/util/test_yus.rb +79 -0
  431. metadata +568 -0
@@ -0,0 +1,36 @@
1
+ require 'encoding/character/utf-8'
2
+ require 'iconv'
3
+
4
+ module ODDB
5
+ module Import
6
+ class Importer
7
+ @@iconv = Iconv.new('utf8//IGNORE//TRANSLIT', 'latin1')
8
+ @@lower = /^(and|for|in(cl)?|on|plain|with)$/i
9
+ attr_accessor :report
10
+ def initialize
11
+ @report = []
12
+ @skip_rows = 1
13
+ end
14
+ def capitalize_all(str)
15
+ ## benchmarked fastest against an append (<<) solution
16
+ str.split(/\b/).collect { |part|
17
+ @@lower.match(part) ? part.downcase : part.capitalize }.join
18
+ end
19
+ def company_name(cname)
20
+ cname = capitalize_all(cname.to_s)
21
+ cname.gsub!(/\.(?!\s)/, '. ')
22
+ cname.gsub!(/[\/&]/) { |match| ' %s ' % match }
23
+ cname.gsub!(/Gmbh/, 'GmbH')
24
+ cname.gsub!(/Ag\b/, 'AG')
25
+ cname.gsub!(/\bKg\b/, 'KG')
26
+ cname.strip!
27
+ u(cname)
28
+ end
29
+ def postprocess
30
+ end
31
+ def utf8(str)
32
+ u @@iconv.iconv(str) if str
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,211 @@
1
+ #!/usr/bin/env ruby
2
+ # Import::Pharma24 -- de.oddb.org -- 21.04.2008 -- hwyss@ywesee.com
3
+
4
+ require 'oddb/import/importer'
5
+ require 'oddb/util/money'
6
+
7
+ module ODDB
8
+ module Import
9
+ class Pharma24 < Importer
10
+ def initialize
11
+ @count = 0
12
+ @created_companies = 0
13
+ @found = 0
14
+ @host = 'http://www.apotheke-online-internet.de'
15
+ end
16
+ def import(agent, packages, opts={:all => false})
17
+ agent.max_history = 1
18
+ packages.collect! { |package| package.odba_id }
19
+ while id = packages.shift
20
+ update_package(agent, ODBA.cache.fetch(id), opts)
21
+ end
22
+ report
23
+ end
24
+ def import_company(data)
25
+ name = company_name(data[:company])
26
+ company = Business::Company.find_by_name(name)
27
+ if(company.nil?)
28
+ @created_companies += 1
29
+ company = Business::Company.new
30
+ company.name.de = name
31
+ end
32
+ company
33
+ end
34
+ def import_size(data, package)
35
+ part = package.parts.first || package.add_part(Drugs::Part.new)
36
+ dose, size, multi = data[:size].to_s.split(/x/i, 3).reverse.compact
37
+ unit = data[:unit].to_s
38
+ if(unit != 'St')
39
+ part.quantity = Drugs::Dose.new(dose, unit)
40
+ elsif(multi.nil?)
41
+ multi = size
42
+ size = dose
43
+ end
44
+ multi = multi.to_i
45
+ size = size.to_i
46
+ part.multi = (multi > 0) ? multi : nil
47
+ part.size = (size > 0) ? size : nil
48
+ if(unitname = data[:unitname])
49
+ unit = Drugs::Unit.find_by_name(unitname)
50
+ unless(unit)
51
+ unit = Drugs::Unit.new
52
+ unit.name.de = unitname
53
+ unit.save
54
+ end
55
+ part.unit = unit
56
+ end
57
+ part.save
58
+ end
59
+ def interesting_tables node
60
+ (node/'table').find_all do |inner_node| !(inner_node/'h2/a').empty? end.to_a
61
+ end
62
+ def get_alphabetical agent, fst, snd
63
+ url = "#@host/#{fst}#{snd}.html"
64
+ page = agent.get url
65
+ data = extract_data page
66
+ while (link = (page/'//a[@class="pageResults"]').last) \
67
+ && link.inner_text == '[n?chste?>>]'
68
+ page = agent.get link.attributes['href']
69
+ data.concat extract_data(page)
70
+ end
71
+ data
72
+ end
73
+ def extract_data page
74
+ data = []
75
+ ## this should be page/'table[h2/a]'
76
+ # -> but Nokogiri apparently can't handle that
77
+ all_tables = interesting_tables page
78
+ duplicates = []
79
+ all_tables.each do |table|
80
+ duplicates.concat interesting_tables(table)
81
+ end
82
+ (all_tables - duplicates).each do |table|
83
+ link, = table/'h2/a'
84
+ if link
85
+ prod = {
86
+ :name => utf8(link.inner_text),
87
+ :url => link.attribute('href').to_s,
88
+ }
89
+ if price = (table/:strong).first
90
+ prod.store :price_public, price.inner_text.tr(',', '.').to_f
91
+ end
92
+ ## should be (table/'td[text()="Abgabehinweis:"]').first
93
+ # -> but Nokogiri apparently can't handle that
94
+ if prescription = td_with_text(table, "Abgabehinweis:")
95
+ td, = prescription.xpath('following-sibling::td')
96
+ prod.store :code_prescription,
97
+ !!/Rezeptpflichtig/.match(td.inner_text)
98
+ end
99
+ ## should be (table/'td[text()="Packungsinhalt:"]').first
100
+ # -> but Nokogiri apparently can't handle that
101
+ if content = td_with_text(table, "Packungsinhalt:")
102
+ td, = content.xpath('following-sibling::td')
103
+ size_str = td.inner_text
104
+ if match = /\s*(.*)\s+(\S+)\s+(\S+)\s*$/.match(size_str)
105
+ size = utf8 match[1]
106
+ unit = utf8 match[2]
107
+ name = utf8 match[3]
108
+ if size.empty?
109
+ size, unit, name = unit, name, nil
110
+ end
111
+ prod.update :size => size, :unit => unit, :unitname => name
112
+ end
113
+ end
114
+ if company = (table/'a[@class="liste"]').first
115
+ prod.store :company, utf8(company.inner_text)
116
+ end
117
+ data.push prod
118
+ end
119
+ end
120
+ data
121
+ end
122
+ def report
123
+ lines = [
124
+ sprintf("Checked %5i Packages", @count),
125
+ sprintf("Updated %5i Packages", @found),
126
+ sprintf("Created %5i Companies", @created_companies),
127
+ ]
128
+ lines
129
+ end
130
+ def search agent, term
131
+ url = "#@host/advanced_search_result.php?keywords=#{term}"
132
+ page = agent.get url
133
+ extract_data page
134
+ rescue Zlib::GzipFile::Error => err
135
+ retries ||= 3
136
+ if retries > 0
137
+ retries -= 1
138
+ retry
139
+ else
140
+ err.message << " after 3 retries - url: #{url}"
141
+ raise err
142
+ end
143
+ rescue StandardError => err
144
+ err.message << " url: #{url}"
145
+ raise err
146
+ end
147
+ def td_with_text table, text
148
+ nodes = (table/'td').find_all do |node|
149
+ node.text.strip == text
150
+ end
151
+ nodes.first
152
+ end
153
+ def update_package agent, package, opts={}
154
+ price = package.price(:public)
155
+ resale = [ :pharma24,
156
+ :csv_product_infos ].include?(package.data_origin(:price_public))
157
+ needs_update = opts[:all] || price.nil? || resale
158
+ if needs_update && (code = package.code(:cid, 'DE'))
159
+ @count += 1
160
+ data, = search agent, code.value
161
+ if data
162
+ @found += 1
163
+ package.name.de = u(data[:name])
164
+ presc = data[:code_prescription]
165
+ if(code = package.code(:prescription))
166
+ if(code.value != presc)
167
+ code.value = presc
168
+ end
169
+ else
170
+ package.add_code Util::Code.new(:prescription, presc, 'DE')
171
+ end
172
+ amount = data[:price_public]
173
+ if(amount > 0)
174
+ update_price package, :public, amount
175
+ if presc
176
+ update_price package, :exfactory, package._price_exfactory
177
+ end
178
+ end
179
+ import_size data, package
180
+ package.save
181
+ if((product = package.product) && product.company.nil?)
182
+ product.company = import_company(data)
183
+ product.save
184
+ end
185
+ end
186
+ end
187
+ end
188
+ def update_price package, type, amount
189
+ dotype = :"price_#{type}"
190
+ # if this price has been edited manually we won't overwrite
191
+ unless((data_origin = package.data_origin(dotype)) \
192
+ && data_origin.to_s.include?('@'))
193
+ either = false
194
+ if(price = package.price(type, 'DE'))
195
+ if(price != amount)
196
+ price.amount = amount
197
+ either = true
198
+ end
199
+ else
200
+ price = Util::Money.new(amount, type, 'DE')
201
+ package.add_price(price)
202
+ either = true
203
+ end
204
+ if either
205
+ package.data_origins.store dotype, :pharma24
206
+ end
207
+ end
208
+ end
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,1186 @@
1
+ #!/usr/bin/env ruby
2
+ # Import::PharmNet -- de.oddb.org -- 15.10.2007 -- hwyss@ywesee.com
3
+
4
+ require 'fileutils'
5
+ require 'htmlentities'
6
+ require 'mechanize'
7
+ require 'oddb/import/importer'
8
+ require 'oddb/import/rtf'
9
+ require 'oddb/util/mail'
10
+ require 'pp'
11
+
12
+ module ODDB
13
+ module Import
14
+ module PharmNet
15
+ class EncodedParser < Mechanize::Page
16
+ @@iconv = Iconv.new('utf8', 'latin1')
17
+ def initialize(uri=nil, response=nil, body=nil, code=nil)
18
+ body = @@iconv.iconv(body.gsub(/iso-8859-1/i, 'utf-8'))
19
+ ## HtmlEntities seems to kill the parser, do it manually for now
20
+ #htmlentities = HTMLEntities.new
21
+ #body = htmlentities.decode(body)
22
+ body.gsub! '&aacute;', 'á'
23
+ body.gsub! '&agrave;', 'à'
24
+ body.gsub! '&auml;', 'ä'
25
+ body.gsub! '&eacute;', 'é'
26
+ body.gsub! '&egrave;', 'è'
27
+ body.gsub! '&euml;', 'ë'
28
+ body.gsub! '&iacute;', 'í'
29
+ body.gsub! '&igrave;', 'ì'
30
+ body.gsub! '&iuml;', 'ï'
31
+ body.gsub! '&oacute;', 'ó'
32
+ body.gsub! '&ograve;', 'ò'
33
+ body.gsub! '&ouml;', 'ö'
34
+ body.gsub! '&uacute;', 'ú'
35
+ body.gsub! '&ugrave;', 'ù'
36
+ body.gsub! '&uuml;', 'ü'
37
+ super(uri, response, body, code)
38
+ end
39
+ end
40
+ class RenewableAgent < SimpleDelegator
41
+ def initialize agent
42
+ super
43
+ renew!
44
+ end
45
+ def renew!
46
+ agent = __getobj__.class.new
47
+ proxies = ODDB.config.http_proxies
48
+ host, port = proxies.at rand(proxies.size)
49
+ if host
50
+ ODDB.logger.debug('PharmNet') {
51
+ "Using proxy server #{host}:#{port}"
52
+ }
53
+ agent.set_proxy host, port
54
+ end
55
+ agent.pluggable_parser.html = EncodedParser
56
+ __setobj__ agent
57
+ end
58
+ end
59
+ class TermedRtf < Rtf
60
+ def initialize(term)
61
+ @term = term
62
+ end
63
+ end
64
+ class FiParser < TermedRtf
65
+ def identify_chapter buffer
66
+ name = case buffer
67
+ when /^1[08]\.?\s*Stand/i
68
+ 'date'
69
+ when /^14\.?\s*Sonstige\s+Hinweise/i
70
+ 'other_advice'
71
+ when /^(2|11)\.?\s*(Verschreibung|Verkauf)/i
72
+ 'sale_limitation'
73
+ when /^1\.?\s*Bezeichnung/i
74
+ 'name'
75
+ when /^[23]\.?\s*(Qualitative|Zusammensetzung)/i
76
+ 'composition'
77
+ when /^3\.?\s*Darreichung/i
78
+ 'galenic_form'
79
+ when /^3\.1\.?\s*Stoff/i
80
+ 'substance_group'
81
+ when /^3\.2\.?\s*(Arzneilich|Bestandteile)/i
82
+ 'active_agents'
83
+ when /^4(\.1)?\.?\s*Anwendung/i
84
+ 'indications'
85
+ when /^(10|4\.2)\.?\s*Dosierung/i
86
+ 'dosage'
87
+ when /^11\.?\s*Art\s+und\s+Dauer/i
88
+ 'application'
89
+ when /^(5|4\.3)\.?\s*Gegenanzeigen/i
90
+ 'counterindications'
91
+ when /^(8|4\.4)\.?\s*(Besondere\s+)?Warnhinweise/i
92
+ 'precautions'
93
+ when /^(7|4\.5)\.?\s*Wechselwirkungen/i
94
+ 'interactions'
95
+ when /^4\.6\.?\s*(Anwendung|Schwangerschaft)/i
96
+ 'pregnancy'
97
+ when /^4\.7\.?\s*Auswirkung/i
98
+ 'driving_ability'
99
+ when /^(6|4\.8)\.?\s*Nebenwirkungen/i
100
+ 'unwanted_effects'
101
+ when /^(12|4\.9)\.?\s*(Notfall|Überdosierung)/i
102
+ 'overdose'
103
+ when /^4\.?\s*Klinisch/i
104
+ 'clinical'
105
+ when /^5\.1\.?\s*Pharmakodynamisch/i
106
+ 'pharmacodynamics'
107
+ when /^13\.2\.?\s*Toxikologisch/i
108
+ 'toxicology'
109
+ when /^(13\.3|5\.2)\.?\s*Pharmakokineti(sch|k)/i
110
+ 'pharmacokinetics'
111
+ when /^13\.4\.?\s*Bioverfügbarkeit/i
112
+ 'bioavailability'
113
+ when /^5\.3\.?\s*Präklinisch/i
114
+ 'preclinicals'
115
+ when /^(13|5)\.?\s*Pharmakologisch/i
116
+ 'pharmacology'
117
+ when /^(3\.3|6\.1)\.?\s*(Liste|Hilfsstoffe?|Sonstige\s+Bestandteile)/i
118
+ 'excipients'
119
+ when /^(9|6\.2)\.?\s*(Wichtigste\s+)?Inkompatibilitäten/i
120
+ 'incompatibilities'
121
+ when /^(15|6\.3)\.?\s*(Dauer|Haltbarkeit)/i
122
+ 'shelf_life'
123
+ when /^(16|6\.4)\.?\s*(Besondere|Lagerung|Aufbewahrung)/i
124
+ 'storage'
125
+ when /^6\.5\.?\s*(Art|Behältnis)/i,
126
+ /^17\.?\s*Darreichungsformen\s+und\sPackung/
127
+ 'packaging'
128
+ when /^6\.6\.?\s*(Besondere|Hinweis|Entsorgung)/i
129
+ 'disposal'
130
+ when /^6\.?\sPharmazeutisch/i
131
+ 'pharmaceutic'
132
+ when /^(19|7)\.?\s*(Name|Pharmazeutischer|Inhaber)/i
133
+ 'company'
134
+ when /^20\.?\s*(Name|Hersteller)/i
135
+ 'producer'
136
+ when /^8\.?\s*Zulassung/i
137
+ 'registration'
138
+ when /^9\.?\s*Datum/i
139
+ 'registration_date'
140
+ when /^zusätzliche Angaben/i
141
+ 'additional_information'
142
+ end
143
+ if(name && !@document.chapter(name))
144
+ @document.add_chapter Text::Chapter.new(name)
145
+ end
146
+ super
147
+ end
148
+ def _sanitize_text(value)
149
+ if @buffer.empty? && @buffer.is_a?(Text::Paragraph)
150
+ value.gsub! /^([BF][A-Z0-9]{1,2})?\s*/, ''
151
+ end
152
+ end
153
+ end
154
+ class PiParser < TermedRtf
155
+ def identify_chapter buffer
156
+ name = nil
157
+ if(/\b#@term\b/i.match buffer)
158
+ name = case buffer
159
+ when /wof(ü|Ü|ue)r\s+(wird|werden)\s+(es|sie)\s+(angewendet|eingenommen)/i,
160
+ /wird\s+angewendet$/i
161
+ 'indications'
162
+ when /^(3\.?\s*)?Wie\s+(ist|sind).+?(anzuwenden|einzunehmen)\?/i
163
+ 'application'
164
+ when /vor\s+der\s+(Anwendung|Einnahme)\s+von/i
165
+ 'precautions'
166
+ when /^([56]\.?\s*)?Wie\s+(ist|sind).+?aufzubewahren/i
167
+ 'storage'
168
+ when /^Bitte\s.+für\s+Kinder\s+nicht\s+erreichbar/i
169
+ 'personal'
170
+ when /^([45]\.?\s*)?Welche\s+Nebenwirkungen/i, /^Nebenwirkungen:?$/i
171
+ 'unwanted_effects'
172
+ when /Behandlungserfolg/i
173
+ nil ## prevent composition if this is a dodgy match
174
+ else
175
+ 'composition'
176
+ end
177
+ else
178
+ name = case buffer
179
+ when /^([45]\.?\s*)?Welche\s+Nebenwirkungen/i, /^Nebenwirkungen:?$/i
180
+ 'unwanted_effects'
181
+ when /^(4\.?\s*)?Verhalten\s+im\s+Notfall/i
182
+ 'emergency'
183
+ when /^(6\.?\s*)?(Weitere\s+)?(Informationen|Angaben)/i,
184
+ /^(6\.?\s*)?Gebrauchsanleitung/i,
185
+ /^Zusätzliche\s+Informationen/i
186
+ 'additional_information'
187
+ when /^Anwendungsgebiete/i
188
+ 'indications'
189
+ when /^Vorsichtsma(ss|ß)nahmen/i
190
+ 'precautions'
191
+ when /^Dosierung\s*($|und)/i, /^Dosierungsanleitung/
192
+ 'application'
193
+ when /Angaben\s+zur\s+Haltbarkeit/i
194
+ 'storage'
195
+ when /^Gegenanzeigen/i
196
+ 'counterindications'
197
+ when /^Darreichungsform/i
198
+ 'packaging'
199
+ when /^(Hersteller.+)?Pharmazeutischer\s+Unternehmer/i,
200
+ /^Pharmazeutischer\s+Hersteller/i
201
+ 'company'
202
+ when /^\s*Stand\b/, /wurde\s+zuletzt\s+überarbeitet/i
203
+ 'date'
204
+ when /^(Sehr\s+geehrte|Liebe)r?\s+Patient/i,
205
+ /^Bitte\s.+für\s+Kinder\s+nicht\s+erreichbar/i
206
+ 'personal'
207
+ end
208
+ end
209
+ composition = @document.chapter('composition')
210
+ if(name && (name == 'composition' || composition))
211
+ chapter = @document.chapter(name)
212
+ if(chapter.nil?)
213
+ @document.add_chapter Text::Chapter.new(name)
214
+ else
215
+ pars = chapter.paragraphs.select do |par| !par.to_s.strip.empty? end
216
+ if(pars.size == 1 && /^\d+/.match(pars.first))
217
+ ## some PI insert a document-overview after the composition, in which
218
+ # case we have an erroneous chapter, identified by only consisting of
219
+ # a heading. In that case:
220
+ composition.append chapter
221
+ @document.remove_chapter chapter
222
+ @document.add_chapter Text::Chapter.new(name)
223
+ end
224
+ end
225
+ end
226
+ super
227
+ end
228
+ def _sanitize_text(value)
229
+ ## some rtfs have unusable information prior to the actual PI
230
+ case value
231
+ when /^PCX\b/
232
+ init
233
+ when /Gebrauchsinformation/
234
+ init if /Recyclinglogo/.match(current_chapter.to_s)
235
+ end
236
+ if @buffer.empty? && @buffer.is_a?(Text::Paragraph)
237
+ value.gsub! /^([P][A-Z0-9]{1,2})?\b/, ''
238
+ value.lstrip!
239
+ end
240
+ end
241
+ end
242
+ class Importer < Importer
243
+ ERROR_EXPLANATIONS = {
244
+ "execution expired" => "the server stopped responding.",
245
+ "503 => Net::HTTPServiceUnavailable" => "the server is unavailable: http://en.wikipedia.org/wiki/HTTP_503#5xx_Server_Error",
246
+ "Invalid RTF-File: Text before rtf-version" => "the link pointed to a file that could not be parsed as RTF (probably a PDF)",
247
+ "Multiple assignment of Registration-Number" => <<-EOS,
248
+ there is already a Registration in the system with this Registration-Number.
249
+ The two Registrations should probably be merged manually.
250
+ EOS
251
+ }
252
+ attr_reader :errors
253
+ def initialize
254
+ @stop = /(Pharma(ceuticals|zeutische\s*Fabrik)?|Arzneim(ittel|\.)|GmbH|[u&]\.?\s*Co\.?|Kg|Ltd\.?|')\s*/i
255
+ @htmlentities = HTMLEntities.new
256
+ @result_cache = {}
257
+ @distance_cache = {}
258
+ @errors = {}
259
+ @assigned = Hash.new 0
260
+ @removed = Hash.new 0
261
+ @not_removed = Hash.new 0
262
+ @repaired = 0
263
+ @reparsed_fis = 0
264
+ @reparsed_pis = 0
265
+ @products_created = 0
266
+ @sequences_created = 0
267
+ @packages_created = 0
268
+ @archive = File.join ODDB.config.var, 'rtf', 'pharmnet'
269
+ @sources = {}
270
+ FileUtils.mkdir_p @archive
271
+ @latest = File.join ODDB.config.var, 'html', 'pharmnet', 'latest.html'
272
+ FileUtils.mkdir_p File.dirname(@latest)
273
+ super
274
+ end
275
+ def assign_info(key, agent, data, sequence, opts)
276
+ return(remove_info key, sequence, opts) unless(url = data[key])
277
+
278
+ sequence.send "#{key}_url=", "http://gripsdb.dimdi.de#{url}"
279
+ term = data[:search_term]
280
+ doc = import_rtf key, agent, url, term, opts
281
+ doc.date = data[:"date_#{key}"]
282
+ # arbitrary cutoff: fachinfos with less than 5 chapters can't be right...
283
+ if doc.chapters.size > 5
284
+ _assign_info key, doc, sequence, opts
285
+ else
286
+ ODDB.logger.debug('PharmNet') {
287
+ sprintf("Discarding %s for %s (%s)", key, sequence_name(sequence), term)
288
+ }
289
+ remove_info key, sequence, opts
290
+ end
291
+ rescue Timeout::Error, StandardError => error
292
+ sequence.save
293
+ ODDB.logger.error('PharmNet') {
294
+ sprintf("%s: %s", error.class, error.message) << "\n" << error.backtrace.join("\n")
295
+ }
296
+ (@errors[error.message[0,42]] ||= []).push [ sequence ? sequence_name(sequence) : '',
297
+ error.message, error.backtrace.find { |ln| /pharmnet/.match ln }.to_s.strip, url ]
298
+ end
299
+ def _assign_info(key, doc, sequence, opts={})
300
+ info = sequence.send(key)
301
+ return unless info.empty? || opts[:replace]
302
+
303
+ ODDB.logger.debug('PharmNet') {
304
+ sprintf("Assigning %s to %s", key, sequence_name(sequence))
305
+ }
306
+ info.de = doc
307
+ @assigned[key] += 1
308
+ doc.save
309
+ info.save
310
+ sequence.save
311
+ end
312
+ def assign_registration(sequence, registration)
313
+ if(registration && sequence.code(:registration, 'EU') != registration)
314
+ ODDB.logger.debug('PharmNet') {
315
+ sprintf('Assigning Registration-Number %s to %s',
316
+ registration, sequence_name(sequence))
317
+ }
318
+ if unique_registration? registration
319
+ conflict = Drugs::Sequence.find_by_code(:value => registration,
320
+ :type => 'registration',
321
+ :country => 'EU')
322
+ if(conflict && conflict != sequence)
323
+ raise sprintf("Multiple assignment of Registration-Number %s (%s-%i/%s-%i)",
324
+ registration, sequence_name(sequence), sequence.odba_id,
325
+ conflict.name.de, conflict.odba_id)
326
+ end
327
+ end
328
+ if(code = sequence.code(:registration, 'EU'))
329
+ code.value = registration
330
+ else
331
+ sequence.add_code Util::Code.new(:registration, registration, 'EU')
332
+ end
333
+ sequence.save
334
+ end
335
+ end
336
+ def best_data(sequence, result)
337
+ sname = sequence.name
338
+ unless sname.de
339
+ sname = sequence.product.name
340
+ end
341
+ comparison = [
342
+ sname,
343
+ (gf = sequence.galenic_forms.first) && gf.description,
344
+ (comp = sequence.company) && comp.name,
345
+ ].collect { |ml| ml ? ml.de : '' }
346
+ suitable = suitable_data comparison, result,
347
+ :subcount => sequence.active_agents.size
348
+ max = 0
349
+ relevances = suitable.collect { |data|
350
+ rel = composition_relevance(sequence.active_agents, data)
351
+ max = rel if rel > max
352
+ }
353
+ contenders = []
354
+ relevances.each_with_index { |rel, idx|
355
+ if(rel == max)
356
+ contenders.push suitable.at(idx)
357
+ end
358
+ }
359
+ contenders.sort_by { |data| data[:date_fachinfo] || data[:date_patinfo] }.last
360
+ end
361
+ def _composition_paired_relevance(agent, detail)
362
+ adose = agent.dose.to_f
363
+ ddose = detail[:dose].to_f
364
+ drel = if(adose == 0 || adose == ddose)
365
+ 1
366
+ else
367
+ if(adose < ddose)
368
+ ddose, adose = adose, ddose
369
+ end
370
+ ddose / adose
371
+ end rescue 0
372
+ ignore = /hydrochlorid/
373
+ subname = agent.substance.name.de.gsub(ignore, '')
374
+ detname = detail[:substance].gsub(ignore, '')
375
+ srel = ngram_similarity(subname, detname)
376
+ drel + srel
377
+ end
378
+ def composition_relevance(agents, data)
379
+ details = data[:composition]
380
+ participants = [agents.size, details.size].max
381
+ relevances = {}
382
+ agents.each_with_index { |agent, aidx|
383
+ details.each_with_index { |detail, didx|
384
+ relevances.store [aidx, didx],
385
+ _composition_paired_relevance(agent, detail)
386
+ }
387
+ }
388
+ max = 0
389
+ exclusive_permutation(participants).each { |pairs|
390
+ sum = pairs.inject(0) { |memo, pair|
391
+ memo + relevances[pair].to_f
392
+ }
393
+ if sum > max
394
+ data.store :pairs, pairs
395
+ max = sum
396
+ end
397
+ }
398
+ data.store :relevance, max / participants
399
+ end
400
+ def create_sequence(term, data, company, product, galform)
401
+ pname, gfname, cname = data[:data]
402
+ official = pname[/^[^\d(]+/].strip
403
+ company_name = company.name.de.gsub(@stop, '').strip
404
+ official_with_company = [ official, company_name ].join(' ')
405
+ @sequences_created += 1
406
+ sequence = Drugs::Sequence.new
407
+ composition = Drugs::Composition.new
408
+ composition.sequence = sequence
409
+ composition.galenic_form = galform
410
+ data[:composition].each do |act|
411
+ substance = import_substance act[:substance]
412
+ agent = Drugs::ActiveAgent.new substance, act[:dose]
413
+ agent.composition = composition
414
+ agent.save
415
+ end
416
+ composition.save
417
+ sequence.name.de = official_with_company
418
+ sequence.marketable = data[:marketable]
419
+ sequence.product = product
420
+ sequence.save
421
+ sequence
422
+ end
423
+ def exclusive_permutation(participants)
424
+ left = (0...participants).to_a
425
+ right = left.dup
426
+ _exclusive_permutation(left, right)
427
+ end
428
+ def _exclusive_permutation(left, right)
429
+ if(left.size == 1)
430
+ [[[left.first, right.first]]]
431
+ else
432
+ result = []
433
+ left.each { |first|
434
+ pass_left = left.reject { |val| val == first }
435
+ right.inject(result) { |memo, second|
436
+ pass_right = right.reject { |val| val == second }
437
+ _exclusive_permutation(pass_left, pass_right).each { |rest|
438
+ memo.push [[first, second]].concat(rest)
439
+ }
440
+ }
441
+ }
442
+ result
443
+ end
444
+ end
445
+ def extract_details(page)
446
+ data = {}
447
+ _extract_newest_link(data, :fachinfo, "Fachinformation", page)
448
+ _extract_newest_link(data, :patinfo, "Gebrauchsinformation", page)
449
+ table = (page/"table[@border='1']").first or return data
450
+ rows = (table/"tr")[1..-1] || []
451
+ composition = rows.collect { |row|
452
+ spans = row/"span"
453
+ {
454
+ :ask_nr => _extract_details(spans[0]),
455
+ :substance => _extract_details(spans[1]),
456
+ :dose => parse_dose(_extract_details(spans[2])),
457
+ }
458
+ }
459
+ data.store :composition, composition
460
+ previous = ''
461
+ (page/"span[@class='wbtxt']").each { |span|
462
+ case previous
463
+ when /Reg\.?-Nr\.?/
464
+ data.store :registration, span.inner_text
465
+ when /Verkehrsf/
466
+ data.store :marketable, span.inner_text.include?('ja')
467
+ end
468
+ previous = span.inner_text
469
+ }
470
+ data
471
+ end
472
+ def _extract_details(span)
473
+ @htmlentities.decode(span.inner_html).gsub(/[\t\n]|\302\240/, '')
474
+ end
475
+ def _extract_newest_link(data, key, search, page)
476
+ hrefs = page.links.inject([]) { |memo, link|
477
+ if(/#{search}\b/i.match link.text)
478
+ str = link.text[/(\d{2}\.){2}\d{4}/]
479
+ memo.push [Date.new(*str.split('.').reverse.collect { |num| num.to_i}),
480
+ link.href]
481
+ end
482
+ memo
483
+ }.sort
484
+ if(oldest = hrefs.last)
485
+ data.update :"date_#{key}" => oldest.first, key => oldest.last
486
+ end
487
+ end
488
+ def extract_result(agent, page)
489
+ form = page.form("titlesForm")
490
+ node = form.form_node
491
+ result = _extract_result node
492
+ hrefs = (node/"a").select { |link|
493
+ /^\d*1(-\d+)?$/.match link.inner_text
494
+ }.collect { |link|
495
+ link["href"]
496
+ }.sort.uniq[1..-1]
497
+ if(hrefs)
498
+ hrefs.each_with_index { |href, idx|
499
+ page = agent.get href
500
+ result.concat _extract_result(page.form("titlesForm").form_node)
501
+ }
502
+ end
503
+ result
504
+ end
505
+ def _extract_result node
506
+ rows = (node/"tr")[2..-4] || []
507
+ rows.collect { |row|
508
+ {
509
+ :data => (row/"td//span[@title]").collect { |span| span["title"] },
510
+ :href => (row/"a[@name]").first["href"],
511
+ }
512
+ }
513
+ end
514
+ def fix_composition(agents, data)
515
+ details = data[:composition]
516
+ data[:pairs].each { |aidx, didx|
517
+ agent = agents[aidx]
518
+ detail = details[didx]
519
+ if(agent.dose.nil? || agent.dose.qty == 0)
520
+ if(agent.substance == detail[:substance])
521
+ agent.dose = detail[:dose]
522
+ agent.save
523
+ @repaired += 1
524
+ elsif(!agent.chemical_equivalence)
525
+ agent.chemical_equivalence = Drugs::ActiveAgent.new agent.substance, agent.dose
526
+ agent.chemical_equivalence.save
527
+ substance = import_substance detail[:substance]
528
+ agent.substance = substance
529
+ agent.dose = detail[:dose]
530
+ agent.save
531
+ @repaired += 1
532
+ end
533
+ end
534
+ }
535
+ end
536
+ def get_details(agent, page, result)
537
+ form = page.form("titlesForm")
538
+ form.field("parinfo").value = 'true'
539
+ form.field("docBaseName").value = form.field('baseName').value
540
+ form.field("magicrequestid").value = rand.to_s
541
+ uri = URI.parse result[:href]
542
+ form.action = uri.path
543
+ uri.query.split('&').each { |param|
544
+ key, value = param.split('=', 2)
545
+ if field = form.field(key)
546
+ field.value = value
547
+ end
548
+ }
549
+ page = form.submit
550
+ end
551
+ def get_search_form(agent)
552
+ index = "http://www.pharmnet-bund.de/dynamic/de/am-info-system/index.html"
553
+ page = agent.get index
554
+ form = page.form("pharmnet_amis_off_ppv")
555
+ page = form.submit
556
+ link = page.links.find { |l| /(?<!nicht )akzeptieren/i.match l.text }
557
+ page = link.click
558
+ form = page.form("search_form")
559
+ link = page.links.find { |l| l.attributes["id"] == 'goME' }
560
+ form.action = link.href
561
+ form
562
+ end
563
+ def get_search_result(agent, term, sequence=nil, opts={})
564
+ opts = { :info_unrestricted => false,
565
+ :repair => false, :retries => 3,
566
+ :retry_unit => 60 }.merge opts
567
+ good = nil
568
+ term = term.dup
569
+ ODDB.logger.debug('PharmNet') { sprintf('Searching for %s', term) }
570
+ result = []
571
+ while result.empty?
572
+ return if term.length < 3
573
+ good = term.dup
574
+ result.concat search(agent, term, sequence, opts)
575
+ if(result.empty?)
576
+ good = term.gsub(/\s+/, '-')
577
+ result.concat search(agent, good, sequence, opts)
578
+ end
579
+ term.gsub! /\s*[^\s]+$/, ''
580
+ end
581
+ result.each { |data| data.store(:search_term, good) }
582
+ result
583
+ rescue Timeout::Error, StandardError => error
584
+ ODDB.logger.error('PharmNet') {
585
+ sprintf("%s: %s", error.class, error.message) << "\n" << error.backtrace.join("\n")
586
+ }
587
+ retries ||= opts[:retries]
588
+ if((error.is_a?(Timeout::Error) || /ServerError/.match(error.message)) \
589
+ && retries > 0)
590
+ seconds = opts[:retry_unit] * 4 ** (opts[:retries] - retries)
591
+ ODDB.logger.debug('PharmNet') {
592
+ sprintf("Waiting %i seconds for the server to recover...", seconds)
593
+ }
594
+ sleep seconds
595
+ retries -= 1
596
+ ODDB.logger.debug('PharmNet') {
597
+ "Renewing Mechanize-agent and starting a new Session" }
598
+ agent.renew!
599
+ @search_form = nil
600
+ retry
601
+ else
602
+ (@errors[error.message[0,42]] ||= []).push [ sequence ? sequence_name(sequence) : '',
603
+ error.message, error.backtrace.find { |ln| /pharmnet/.match ln }.to_s.strip ]
604
+ end
605
+ nil
606
+ end
607
+ def identify_details(agent, term, sequence=nil,
608
+ opts = { :info_unrestricted => false,
609
+ :repair => false, :retries => 3})
610
+ if result = get_search_result(agent, term, sequence, opts)
611
+ if result.size == 1
612
+ result.first
613
+ else
614
+ best_data sequence, result
615
+ end
616
+ end
617
+ end
618
+ def identify_product(term, data, company)
619
+ pname, gfname, cname = data[:data]
620
+ official = pname[/^[^\d(]+/].strip
621
+ company_name = company.name.de.gsub(@stop, '').strip
622
+ official_with_company = [ official, company_name ].join(' ')
623
+ term_with_company = [ term, company_name ].join(' ')
624
+ [official_with_company, official, term_with_company, term].each do |cnd|
625
+ if (candidate = Drugs::Product.find_by_name(cnd)) \
626
+ && candidate.company == company
627
+ return candidate
628
+ else
629
+ Drugs::Product.search_by_name(cnd).each do |candidate|
630
+ if candidate.company == company
631
+ return candidate
632
+ end
633
+ end
634
+ end
635
+ end
636
+ ## if we can't find a product, we'll have to create a new one.
637
+ @products_created += 1
638
+ product = Drugs::Product.new
639
+ product.name.de = term_with_company
640
+ product.company = company
641
+ product.save
642
+ end
643
+ def identify_sequence(data, product, galform)
644
+ if product
645
+ doses = data[:composition].collect do |act| act[:dose] end.compact.sort
646
+ product.sequences.find do |seq|
647
+ seq.compositions.size == 1 \
648
+ && seq.doses.compact.sort == doses \
649
+ && seq.galenic_forms == [galform]
650
+ end
651
+ end
652
+ end
653
+ def import(agent, sequences, opts = { :replace => false,
654
+ :reload => false,
655
+ :remove => false,
656
+ :repair => false,
657
+ :reparse => false,
658
+ :reparse_patinfo => false,
659
+ :retries => 3,
660
+ :retry_unit => 60 })
661
+ Util::Mail.notify_admins sprintf("%s: %s", Time.now.strftime('%c'),
662
+ self.class), _import(agent, sequences, opts)
663
+ end
664
+ def _import(agent, sequences, opts = { :replace => false,
665
+ :reload => false,
666
+ :remove => false,
667
+ :repair => false,
668
+ :reparse => false,
669
+ :reparse_patinfo => false,
670
+ :retries => 3,
671
+ :retry_unit => 60 })
672
+ agent = RenewableAgent.new agent
673
+ if resume = opts[:resume]
674
+ resume = resume.to_s.downcase
675
+ sequences = sequences.select { |sequence|
676
+ (name = sequence_name(sequence)) && name.downcase >= resume
677
+ }
678
+ else
679
+ sequences = sequences.select { |sequence|
680
+ sequence_name(sequence)
681
+ }
682
+ end
683
+ sequences = sequences.sort_by { |sequence|
684
+ sequence_name(sequence)
685
+ }
686
+ count = 0
687
+ head = sequences.first.name
688
+ @checked = "Checked 0 Sequences"
689
+ ## let odba cache release unneeded sequences ...
690
+ sequences.collect! { |sequence| sequence.odba_id }
691
+ while odba_id = sequences.shift
692
+ begin
693
+ ## ... and refetch them when necessary
694
+ sequence = ODBA.cache.fetch(odba_id)
695
+ count += 1
696
+ @checked = sprintf "Checked %i Sequences from '%s' to '%s'",
697
+ count, head, sequence_name(sequence)
698
+ process(agent, sequence, opts)
699
+ rescue ODBA::OdbaError
700
+ end
701
+ end
702
+ report
703
+ end
704
+ def import_company(name)
705
+ term = clean = name.gsub(@stop, '').strip
706
+ company = Business::Company.find_by_name(term)
707
+ while company.nil? && !term.empty?
708
+ company = Business::Company.search_by_name(term).find do |gf|
709
+ relevance = ngram_similarity clean, gf.name.de.gsub(@stop, '')
710
+ relevance > 0.8
711
+ end
712
+ term = term.gsub /(^|\s)+\S+\s*$/, ''
713
+ end
714
+ if company
715
+ company.name.add_synonym name
716
+ else
717
+ company = Business::Company.new
718
+ company.name.de = name
719
+ end
720
+ company.save
721
+ company
722
+ end
723
+ def import_galenic_form(description)
724
+ galform = Drugs::GalenicForm.find_by_description(description)
725
+ unless galform
726
+ galform = Drugs::GalenicForm.search_by_description(description).find do |gf|
727
+ sim = ngram_similarity description, gf.description.de
728
+ sim > 0.75
729
+ end
730
+ if galform
731
+ galform.description.add_synonym description
732
+ galform.save
733
+ end
734
+ end
735
+ unless galform
736
+ galform = Drugs::GalenicForm.new
737
+ galform.description.de = description
738
+ galform.save
739
+ end
740
+ galform
741
+ end
742
+ def import_missing(agent, term, opts={})
743
+ @checked = "Searched for FIs/GIs for '#{term}'"
744
+ opts = { :skip_totals => true }.merge opts
745
+ agent = RenewableAgent.new agent
746
+ if result = get_search_result(agent, term, nil, opts)
747
+ result.each do |data|
748
+ company, product, galform = nil
749
+ sequence = nil
750
+ registration = data[:registration]
751
+ if registration && unique_registration?(registration)
752
+ sequence = Drugs::Sequence.find_by_code :value => registration
753
+ end
754
+ unless sequence
755
+ pname, gfname, cname = data[:data]
756
+ galform = import_galenic_form gfname
757
+ company = import_company cname
758
+ product = identify_product term, data, company
759
+ sequence = identify_sequence data, product, galform
760
+ end
761
+ if sequence
762
+ if opts[:repair]
763
+ pname, gfname, cname = data[:data]
764
+ if product = sequence.product
765
+ product.company ||= import_company cname
766
+ end
767
+ company_name = product.company.name.de.gsub(@stop, '').strip
768
+ official = pname[/^[^\d(]+/].strip
769
+ sequence.marketable = data[:marketable]
770
+ sequence.name.de = [ official, company_name ].join(' ')
771
+ agents = sequence.active_agents
772
+ relevance = composition_relevance agents, data
773
+ fix_composition agents, data
774
+ end
775
+ else
776
+ sequence = create_sequence term, data, company, product, galform
777
+ end
778
+ assign_registration sequence, data[:registration]
779
+ assign_info(:fachinfo, agent, data, sequence, opts)
780
+ assign_info(:patinfo, agent, data, sequence, opts)
781
+ import_package sequence, data, opts
782
+ end
783
+ end
784
+ report opts
785
+ end
786
+ def import_package(sequence, data, opts={})
787
+ pname, gfname, _ = data[:data]
788
+ if match = /^(?<name>.*?)\s*-\s*OP((?<size>\d+)|\((?<multi>\d+)x(?<size>\d+)\))(\((?<unit>[^)]+)\))?$/i.match(pname)
789
+ size = match[:size].to_i
790
+ multi = match[:multi] && match[:multi].to_i
791
+ package = sequence.packages.find do |pac|
792
+ pac.size == size
793
+ end
794
+ if package.nil?
795
+ @packages_created += 1
796
+ package = Drugs::Package.new
797
+ package.add_code Util::Code.new(:cid, "oddb#{package.uid}", 'DE')
798
+ package.name.de = match[:name]
799
+ part = Drugs::Part.new
800
+ part.size = size
801
+ part.unit = import_unit gfname
802
+ part.package = package
803
+ part.composition = sequence.compositions.first
804
+ part.save
805
+ package.sequence = sequence
806
+ package.save
807
+ end
808
+ package
809
+ end
810
+ end
811
+ def import_rtf(key, agent, url, term, opts = { :reparse => false,
812
+ :reload => false})
813
+ pklass = case key
814
+ when :fachinfo
815
+ FiParser
816
+ when :patinfo
817
+ PiParser
818
+ end
819
+ path = File.join @archive, File.basename(url)
820
+ doc = Text::Document.find_by_source(url)
821
+ ODDB.logger.debug('PharmNet') {
822
+ sprintf('Comparing %s-sources for %s', key, term) }
823
+ if(doc.nil? || (opts[:reparse] && !@sources[url]))
824
+ @sources.store url, true
825
+ io = nil
826
+ if(opts[:reload] || !File.exist?(path))
827
+ uri = URI.parse url
828
+ uri.scheme = 'http'
829
+ if uri.host.to_s.empty?
830
+ uri.host = 'gripsdb.dimdi.de'
831
+ end
832
+ ODDB.logger.debug('PharmNet') {
833
+ sprintf('Downloading %s for %s from %s', key, term, uri.to_s) }
834
+ file = agent.get uri.to_s
835
+ file.save path
836
+ ODDB.logger.debug('PharmNet') {
837
+ sprintf('Saving %s for %s in %s', key, term, path) }
838
+ io = StringIO.new(file.body)
839
+ else
840
+ ODDB.logger.debug('PharmNet') {
841
+ sprintf('Reading %s for %s from %s', key, term, path) }
842
+ io = File.open(path)
843
+ end
844
+ term = term.downcase.gsub(/[\s-]/, '.')
845
+ chapters = []
846
+ new = nil
847
+ while !term.empty? && chapters.size < 4
848
+ ODDB.logger.debug('PharmNet') {
849
+ sprintf('Parsing %s with term: %s', key, term) }
850
+ io.rewind
851
+ new = pklass.new(term).import io
852
+ chapters = new.chapters
853
+ term = term.gsub /(\A|\.)[^.]*$/, ''
854
+ end
855
+ ## ensure that chapter-headings are bold
856
+ new.chapters.each { |chapter|
857
+ if((paragraph = chapter.paragraphs.first) \
858
+ && (format = paragraph.formats.first))
859
+ format.augment "b"
860
+ end
861
+ }
862
+ new.source = url
863
+ if doc
864
+ doc.chapters.replace chapters
865
+ doc.save
866
+ else
867
+ doc = new
868
+ end
869
+ end
870
+ doc
871
+ end
872
+ def import_substance(name)
873
+ substance = Drugs::Substance.find_by_name name
874
+ unless(substance)
875
+ substance = Drugs::Substance.new
876
+ substance.name.de = name
877
+ substance.save
878
+ end
879
+ substance
880
+ end
881
+ def import_unit(name)
882
+ unit = Drugs::Unit.find_by_name name
883
+ unless unit
884
+ unit = Drugs::Unit.search_by_name(name).find do |unt|
885
+ sim = ngram_similarity name, unt.name.de
886
+ sim > 0.75
887
+ end
888
+ if unit
889
+ unit.name.add_synonym name
890
+ unit.save
891
+ end
892
+ end
893
+ unless unit
894
+ unit = Drugs::Unit.new
895
+ unit.name.de = name
896
+ unit.save
897
+ end
898
+ unit
899
+ end
900
+ def ngram_similarity(str1, str2, n=5)
901
+ str1 = u(str1).downcase.gsub(/[\s,.\-\/]+/, '')
902
+ str2 = u(str2).downcase.gsub(/[\s,.\-\/]+/, '')
903
+ if(str1.length < str2.length)
904
+ str1, str2 = str2, str1
905
+ end
906
+ parts = [ str1.length - n, 0 ].max + 1
907
+ count = 0
908
+ parts.times { |idx|
909
+ if(str2.include? str1[idx, n])
910
+ count += 1
911
+ end
912
+ }
913
+ count.to_f / parts
914
+ end
915
+ def parse_dose(str)
916
+ Drugs::Dose.new(str[/^\d*\.\d*/].to_f, str[/[^\d\.]+$/])
917
+ end
918
+ def process(agent, sequence, opts = { :replace => false,
919
+ :reload => false,
920
+ :remove => false,
921
+ :repair => false,
922
+ :reparse => false,
923
+ :reparse_patinfo => false,
924
+ :retries => 3,
925
+ :retry_unit => 60 })
926
+
927
+ return(reparse_fachinfo agent, sequence) if opts[:reparse] && !opts[:reparse_patinfo]
928
+ return(reparse_patinfo agent, sequence) if opts[:reparse_patinfo]
929
+ return unless sequence.fachinfo.empty? || sequence.patinfo.empty? \
930
+ || opts[:replace] || opts[:remove]
931
+ data = identify_details(agent, sequence_name(sequence), sequence, opts)
932
+
933
+ return(remove_infos sequence, opts) unless data
934
+
935
+ cutoff = composition_relevance(sequence.active_agents, data)
936
+ return(remove_infos sequence, opts) if(cutoff <= 1.25) # arbitrary value
937
+
938
+ assign_info(:fachinfo, agent, data, sequence, opts)
939
+ assign_info(:patinfo, agent, data, sequence, opts)
940
+
941
+ fix_composition sequence.active_agents, data if(opts[:repair])
942
+
943
+ # assign registration number if really good match
944
+ return if(cutoff < 2) # arbitrary value
945
+ assign_registration sequence, data[:registration]
946
+ rescue Timeout::Error, StandardError => error
947
+ ODDB.logger.error('PharmNet') {
948
+ sprintf("%s: %s", error.class, error.message) << "\n" << error.backtrace.join("\n")
949
+ }
950
+ (@errors[error.message[0,42]] ||= []).push [ sequence_name(sequence),
951
+ error.message, error.backtrace.find { |ln| /pharmnet/.match ln }.to_s.strip ]
952
+ end
953
+ def remove_info(key, sequence, opts)
954
+ info = sequence.send(key)
955
+ if opts[:remove] && info.de
956
+ @removed[key] += 1
957
+ ODDB.logger.debug('PharmNet') {
958
+ sprintf('Removing Fachinfo from %s', sequence_name(sequence))
959
+ }
960
+ info.de = nil
961
+ sequence.save
962
+ elsif info.de
963
+ @not_removed[key] += 1
964
+ end
965
+ end
966
+ def remove_infos(sequence, opts)
967
+ remove_info :fachinfo, sequence, opts
968
+ remove_info :patinfo, sequence, opts
969
+ end
970
+ def reparse_fachinfo(agent, sequence)
971
+ if((info = sequence.fachinfo.de) && (source = info.source) \
972
+ && (doc = import_rtf :fachinfo, agent, source, sequence_name(sequence),
973
+ :reparse => true))
974
+ @reparsed_fis += 1
975
+ info.chapters.replace doc.chapters
976
+ info.save
977
+ end
978
+ end
979
+ def reparse_patinfo(agent, sequence)
980
+ if((info = sequence.patinfo.de) && (source = info.source) \
981
+ && (doc = import_rtf :patinfo, agent, source, sequence_name(sequence),
982
+ :reparse => true))
983
+ @reparsed_pis += 1
984
+ info.chapters.replace doc.chapters
985
+ info.save
986
+ end
987
+ end
988
+ def report opts={}
989
+ fi_sources = { }
990
+ pi_sources = { }
991
+ fi_count = pi_count = 0
992
+ unless opts[:skip_totals]
993
+ Drugs::Sequence.all { |sequence|
994
+ if(doc = sequence.fachinfo.de)
995
+ fi_count += 1
996
+ fi_sources[doc.source] = true
997
+ end
998
+ if(doc = sequence.patinfo.de)
999
+ pi_count += 1
1000
+ pi_sources[doc.source] = true
1001
+ end
1002
+ }
1003
+ end
1004
+ lines = [ @checked,
1005
+ "",
1006
+ "Assigned #{@assigned[:fachinfo]} Fachinfos",
1007
+ "Removed #{@removed[:fachinfo]} Fachinfos",
1008
+ "Kept #{@not_removed[:fachinfo]} unconfirmed Fachinfos",
1009
+ ("Total: #{fi_sources.size} Fachinfos linked to #{fi_count} Sequences" \
1010
+ unless opts[:skip_totals]),
1011
+ "",
1012
+ "Assigned #{@assigned[:patinfo]} Patinfos",
1013
+ "Removed #{@removed[:patinfo]} Patinfos",
1014
+ "Kept #{@not_removed[:patinfo]} unconfirmed Patinfos",
1015
+ ("Total: #{pi_sources.size} Patinfos linked to #{pi_count} Sequences" \
1016
+ unless opts[:skip_totals]),
1017
+ "",
1018
+ "Created #@products_created Products",
1019
+ "Created #@sequences_created Sequences",
1020
+ "Created #@packages_created Packages",
1021
+ "",
1022
+ "Reparsed #@reparsed_fis Fachinfos",
1023
+ "Reparsed #@reparsed_pis Patinfos",
1024
+ "Repaired #@repaired Active Agents",
1025
+ "",
1026
+ "Errors: #{@errors.values.inject(0) do |inj, errs| inj + errs.size end}",
1027
+ ].compact
1028
+ errors = []
1029
+ @errors.sort.each do |key, instances|
1030
+ heading = "#{instances.size} x #{key}"
1031
+ lines.push " - #{heading}"
1032
+ errors.push "", "#{heading}:"
1033
+ if msg = ERROR_EXPLANATIONS[key]
1034
+ errors.push "This means that #{msg}"
1035
+ end
1036
+ errors.push ''
1037
+ errors.concat(instances.collect do |name, message, line, link|
1038
+ sprintf "%s: %s (%s) -> http://gripsdb.dimdi.de%s",
1039
+ name, message, line, link
1040
+ end)
1041
+ end
1042
+ lines.concat errors
1043
+ end
1044
+ def result_page(form, term)
1045
+ form.field('term').value = term
1046
+ form.submit
1047
+ end
1048
+ def search(agent, term, sequence=nil, opts={})
1049
+ term = term.downcase
1050
+ @result_cache.fetch(term) do
1051
+ if(minimal = term[0,3])
1052
+ @result_cache.delete_if { |key, _|
1053
+ key < minimal
1054
+ }
1055
+ end
1056
+ @search_form ||= get_search_form agent
1057
+ ## if we need to repair the active agents, we want all results, otherwise only
1058
+ # those that have a Fach- or PatInfo to parse.
1059
+ fi_only = opts[:info_unrestricted] \
1060
+ || (opts[:repair] && sequence && sequence.active_agents.any? { |act|
1061
+ act.dose.qty == 0 }) ? 'NO_RESTRICTION' : 'YES'
1062
+ set_fi_only(@search_form, fi_only)
1063
+ details = agent.transact {
1064
+ page = result_page @search_form, term
1065
+ if(found = _search_invalid? page, term)
1066
+ ODDB.logger.error('PharmNet') {
1067
+ sprintf "Searched for '%s' but got result for '%s' - creating new session",
1068
+ term, found
1069
+ }
1070
+ agent.renew!
1071
+ @search_form = get_search_form agent
1072
+ set_fi_only(@search_form, fi_only)
1073
+ page = result_page @search_form, term
1074
+ if(_search_invalid? page, term)
1075
+ return []
1076
+ end
1077
+ end
1078
+ page.save @latest
1079
+ result = extract_result agent, page
1080
+ result.collect do |data|
1081
+ dpg = get_details agent, page, data
1082
+ detail = data.merge extract_details(dpg)
1083
+ detail.delete :href
1084
+ detail
1085
+ end
1086
+ }
1087
+ @result_cache.store term, details
1088
+ end
1089
+ end
1090
+ def _search_invalid?(page, term)
1091
+ div = (page/"div.wbsectionsubtitlebar").last
1092
+ if(div.nil?)
1093
+ ''
1094
+ elsif(!/Arzneimittelname:\s#{Regexp.escape(term)}\?/i.match(div.inner_text))
1095
+ div.inner_text[/Arzneimittelname:[^?]+/]
1096
+ end
1097
+ end
1098
+ def sequence_name sequence
1099
+ if sequence
1100
+ if name = sequence.name.de
1101
+ name
1102
+ elsif product = sequence.product
1103
+ product.name.de
1104
+ end
1105
+ end
1106
+ end
1107
+ def set_fi_only(form, status="YES")
1108
+ form.radiobuttons.each do |b|
1109
+ if b.name == "WFTYP" && b.value == status
1110
+ b.check
1111
+ end
1112
+ end
1113
+ end
1114
+ def suitable_data(comparison, selection, opts = {})
1115
+ max = 0
1116
+ sums = []
1117
+ preselection = []
1118
+ ODDB.logger.debug('PharmNet') {
1119
+ "Checking for suitable data in #{selection.size} results"
1120
+ }
1121
+ selection.each_with_index { |data, idx|
1122
+ if(dists = _suitable_data(data, comparison, opts))
1123
+ sum = dists.inject { |a,b| a+b }
1124
+ max = sum if sum > max
1125
+ sums.push sum
1126
+ preselection.push data
1127
+ end
1128
+ }
1129
+ ODDB.logger.debug('PharmNet') {
1130
+ "Found a preselection of #{preselection.size} results"
1131
+ }
1132
+ result = []
1133
+ sums.each_with_index { |sum, idx|
1134
+ if sum == max
1135
+ result.push preselection[idx]
1136
+ end
1137
+ }
1138
+ ODDB.logger.debug('PharmNet') {
1139
+ "Returning the best #{result.size} results"
1140
+ }
1141
+ result
1142
+ end
1143
+ def _suitable_data(data, comparison, opts)
1144
+ opts[:cutoff] ||= 0.25
1145
+ idx = 0
1146
+ raw = data[:data].dup
1147
+ comp = comparison.dup
1148
+
1149
+ unless(opts[:keep_dose])
1150
+ part = Regexp.escape(raw[1].to_s).gsub('\ ', ')|(')
1151
+ ptrn = /(#{part})|(\b\d+\s*m?g(\s*\/\s*\d+\s*h)?)[\-\s]*/i
1152
+ raw[0] = raw[0].gsub(ptrn, '')
1153
+ comp[0] = comp[0].gsub(ptrn, '')
1154
+ end
1155
+
1156
+ tabl = /([a-z]{4,})tab.*/i
1157
+ raw[1] = raw[1].to_s.gsub(tabl, '\1')
1158
+ # Import::Csv::ProductInfos passes a comparison without Galenic Form if
1159
+ # no suitable data is found on the first try
1160
+ if comp[1]
1161
+ comp[1] = comp[1].to_s.gsub(tabl, '\1')
1162
+ end
1163
+ dists = raw.collect { |str|
1164
+ str = str.to_s
1165
+ othr = comparison[idx]
1166
+ other = othr ? othr.to_s : str
1167
+ idx += 1
1168
+
1169
+ relevance = ngram_similarity str.gsub(@stop, ''), other.gsub(@stop, '')
1170
+ return if relevance < opts[:cutoff]
1171
+ relevance
1172
+ }
1173
+ if(subcount = opts[:subcount])
1174
+ cdist = (comp = data[:composition]) ? (subcount - comp.size).abs : subcount
1175
+ dists.push(cdist) unless cdist > 0
1176
+ else
1177
+ dists
1178
+ end
1179
+ end
1180
+ def unique_registration? code
1181
+ !/^EU/.match code.to_s
1182
+ end
1183
+ end
1184
+ end
1185
+ end
1186
+ end