de.oddb 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (431) hide show
  1. data/Guide.txt +3 -0
  2. data/History.txt +5 -0
  3. data/LICENCE.txt +339 -0
  4. data/Manifest.txt +430 -0
  5. data/README +423 -0
  6. data/README.txt +25 -0
  7. data/Rakefile +28 -0
  8. data/bin/admin +71 -0
  9. data/bin/exportd +44 -0
  10. data/bin/oddbd +33 -0
  11. data/data/fulltext/data/dicts/french/fulltext.aff +1057 -0
  12. data/data/fulltext/data/dicts/french/fulltext.dict +91189 -0
  13. data/data/fulltext/data/dicts/french/fulltext.stop +135 -0
  14. data/data/fulltext/data/dicts/german/fulltext.aff +1233 -0
  15. data/data/fulltext/data/dicts/german/fulltext.dict +287574 -0
  16. data/data/fulltext/data/dicts/german/fulltext.stop +133 -0
  17. data/data/fulltext/data/german_compound/README +15 -0
  18. data/data/fulltext/data/german_compound/compound.pl +63 -0
  19. data/data/fulltext/data/german_compound/german.stop +20 -0
  20. data/data/fulltext/data/ispell-german-compound.tar.gz +0 -0
  21. data/data/fulltext/redist/dict_french/Makefile +12 -0
  22. data/data/fulltext/redist/dict_french/README.french +1 -0
  23. data/data/fulltext/redist/dict_french/dict_french.sql.in +7 -0
  24. data/data/fulltext/redist/dict_french/dict_snowball.c +56 -0
  25. data/data/fulltext/redist/dict_french/french_stem.c +1222 -0
  26. data/data/fulltext/redist/dict_french/french_stem.h +16 -0
  27. data/data/fulltext/redist/dict_french/subinclude.h +2 -0
  28. data/data/fulltext/redist/dict_german/Makefile +12 -0
  29. data/data/fulltext/redist/dict_german/README.german +1 -0
  30. data/data/fulltext/redist/dict_german/dict_german.sql.in +7 -0
  31. data/data/fulltext/redist/dict_german/dict_snowball.c +56 -0
  32. data/data/fulltext/redist/dict_german/german_stem.c +527 -0
  33. data/data/fulltext/redist/dict_german/german_stem.h +16 -0
  34. data/data/fulltext/redist/dict_german/subinclude.h +1 -0
  35. data/data/fulltext/redist/french_stem.c +1222 -0
  36. data/data/fulltext/redist/french_stem.h +16 -0
  37. data/data/fulltext/redist/german_stem.c +527 -0
  38. data/data/fulltext/redist/german_stem.h +16 -0
  39. data/jobs/export_chde_xls +20 -0
  40. data/jobs/export_csv +20 -0
  41. data/jobs/export_fachinfo_yaml +20 -0
  42. data/jobs/export_patinfo_yaml +20 -0
  43. data/jobs/export_yaml +20 -0
  44. data/jobs/import_dimdi +15 -0
  45. data/jobs/import_gkv +19 -0
  46. data/jobs/import_pharma24 +15 -0
  47. data/jobs/import_pharmnet +30 -0
  48. data/jobs/import_whocc +18 -0
  49. data/lib/fixes/singular.rb +9 -0
  50. data/lib/fixes/yaml.rb +13 -0
  51. data/lib/oddb.rb +13 -0
  52. data/lib/oddb/business/company.rb +18 -0
  53. data/lib/oddb/business/grant_download.rb +27 -0
  54. data/lib/oddb/business/invoice.rb +75 -0
  55. data/lib/oddb/config.rb +112 -0
  56. data/lib/oddb/currency.rb +6 -0
  57. data/lib/oddb/drugs.rb +16 -0
  58. data/lib/oddb/drugs/active_agent.rb +37 -0
  59. data/lib/oddb/drugs/atc.rb +53 -0
  60. data/lib/oddb/drugs/composition.rb +41 -0
  61. data/lib/oddb/drugs/ddd.rb +24 -0
  62. data/lib/oddb/drugs/dose.rb +107 -0
  63. data/lib/oddb/drugs/galenic_form.rb +21 -0
  64. data/lib/oddb/drugs/galenic_group.rb +17 -0
  65. data/lib/oddb/drugs/package.rb +111 -0
  66. data/lib/oddb/drugs/part.rb +55 -0
  67. data/lib/oddb/drugs/product.rb +25 -0
  68. data/lib/oddb/drugs/sequence.rb +68 -0
  69. data/lib/oddb/drugs/substance.rb +31 -0
  70. data/lib/oddb/drugs/substance_group.rb +13 -0
  71. data/lib/oddb/drugs/unit.rb +12 -0
  72. data/lib/oddb/export.rb +4 -0
  73. data/lib/oddb/export/csv.rb +94 -0
  74. data/lib/oddb/export/l10n_sessions.rb +30 -0
  75. data/lib/oddb/export/rss.rb +44 -0
  76. data/lib/oddb/export/server.rb +137 -0
  77. data/lib/oddb/export/xls.rb +127 -0
  78. data/lib/oddb/export/yaml.rb +212 -0
  79. data/lib/oddb/html/state/download.rb +13 -0
  80. data/lib/oddb/html/state/drugs/admin/package.rb +190 -0
  81. data/lib/oddb/html/state/drugs/admin/product.rb +56 -0
  82. data/lib/oddb/html/state/drugs/admin/sequence.rb +253 -0
  83. data/lib/oddb/html/state/drugs/ajax/explain_ddd_price.rb +19 -0
  84. data/lib/oddb/html/state/drugs/ajax/explain_price.rb +19 -0
  85. data/lib/oddb/html/state/drugs/ajax/global.rb +18 -0
  86. data/lib/oddb/html/state/drugs/ajax/package_infos.rb +19 -0
  87. data/lib/oddb/html/state/drugs/ajax/remote_infos.rb +19 -0
  88. data/lib/oddb/html/state/drugs/atc_browser.rb +39 -0
  89. data/lib/oddb/html/state/drugs/atc_guidelines.rb +21 -0
  90. data/lib/oddb/html/state/drugs/compare.rb +52 -0
  91. data/lib/oddb/html/state/drugs/download_export.rb +18 -0
  92. data/lib/oddb/html/state/drugs/downloads.rb +42 -0
  93. data/lib/oddb/html/state/drugs/fachinfo.rb +21 -0
  94. data/lib/oddb/html/state/drugs/feedback.rb +91 -0
  95. data/lib/oddb/html/state/drugs/global.rb +270 -0
  96. data/lib/oddb/html/state/drugs/init.rb +18 -0
  97. data/lib/oddb/html/state/drugs/login.rb +17 -0
  98. data/lib/oddb/html/state/drugs/package.rb +32 -0
  99. data/lib/oddb/html/state/drugs/patinfo.rb +21 -0
  100. data/lib/oddb/html/state/drugs/products.rb +51 -0
  101. data/lib/oddb/html/state/drugs/result.rb +125 -0
  102. data/lib/oddb/html/state/global.rb +206 -0
  103. data/lib/oddb/html/state/global_predefine.rb +17 -0
  104. data/lib/oddb/html/state/limit.rb +17 -0
  105. data/lib/oddb/html/state/login.rb +56 -0
  106. data/lib/oddb/html/state/paypal/checkout.rb +97 -0
  107. data/lib/oddb/html/state/paypal/collect.rb +19 -0
  108. data/lib/oddb/html/state/paypal/download.rb +61 -0
  109. data/lib/oddb/html/state/paypal/redirect.rb +18 -0
  110. data/lib/oddb/html/state/register_download.rb +24 -0
  111. data/lib/oddb/html/state/register_export.rb +38 -0
  112. data/lib/oddb/html/state/register_poweruser.rb +17 -0
  113. data/lib/oddb/html/state/viral/admin.rb +79 -0
  114. data/lib/oddb/html/state/viral/poweruser.rb +16 -0
  115. data/lib/oddb/html/util/annotated_list.rb +39 -0
  116. data/lib/oddb/html/util/know_it_all.rb +28 -0
  117. data/lib/oddb/html/util/known_user.rb +55 -0
  118. data/lib/oddb/html/util/lookandfeel.rb +698 -0
  119. data/lib/oddb/html/util/need_all_input.rb +29 -0
  120. data/lib/oddb/html/util/session.rb +84 -0
  121. data/lib/oddb/html/util/sort.rb +72 -0
  122. data/lib/oddb/html/util/unsaved_helper.rb +20 -0
  123. data/lib/oddb/html/util/validator.rb +59 -0
  124. data/lib/oddb/html/view/ajax/json.rb +22 -0
  125. data/lib/oddb/html/view/alpha_header.rb +28 -0
  126. data/lib/oddb/html/view/document.rb +117 -0
  127. data/lib/oddb/html/view/download.rb +33 -0
  128. data/lib/oddb/html/view/drugs/admin/package.rb +245 -0
  129. data/lib/oddb/html/view/drugs/admin/product.rb +104 -0
  130. data/lib/oddb/html/view/drugs/admin/sequence.rb +305 -0
  131. data/lib/oddb/html/view/drugs/ajax/explain_ddd_price.rb +87 -0
  132. data/lib/oddb/html/view/drugs/ajax/explain_price.rb +61 -0
  133. data/lib/oddb/html/view/drugs/ajax/package_infos.rb +105 -0
  134. data/lib/oddb/html/view/drugs/ajax/remote_infos.rb +44 -0
  135. data/lib/oddb/html/view/drugs/atc_browser.rb +68 -0
  136. data/lib/oddb/html/view/drugs/atc_guidelines.rb +94 -0
  137. data/lib/oddb/html/view/drugs/compare.rb +95 -0
  138. data/lib/oddb/html/view/drugs/download_export.rb +28 -0
  139. data/lib/oddb/html/view/drugs/downloads.rb +128 -0
  140. data/lib/oddb/html/view/drugs/fachinfo.rb +46 -0
  141. data/lib/oddb/html/view/drugs/feedback.rb +235 -0
  142. data/lib/oddb/html/view/drugs/init.rb +51 -0
  143. data/lib/oddb/html/view/drugs/legend.rb +24 -0
  144. data/lib/oddb/html/view/drugs/package.rb +403 -0
  145. data/lib/oddb/html/view/drugs/patinfo.rb +46 -0
  146. data/lib/oddb/html/view/drugs/products.rb +97 -0
  147. data/lib/oddb/html/view/drugs/result.rb +296 -0
  148. data/lib/oddb/html/view/drugs/search.rb +33 -0
  149. data/lib/oddb/html/view/drugs/template.rb +15 -0
  150. data/lib/oddb/html/view/foot.rb +52 -0
  151. data/lib/oddb/html/view/google.rb +23 -0
  152. data/lib/oddb/html/view/google_ads.rb +40 -0
  153. data/lib/oddb/html/view/head.rb +78 -0
  154. data/lib/oddb/html/view/limit.rb +109 -0
  155. data/lib/oddb/html/view/list.rb +59 -0
  156. data/lib/oddb/html/view/login.rb +38 -0
  157. data/lib/oddb/html/view/navigation.rb +67 -0
  158. data/lib/oddb/html/view/offset_header.rb +35 -0
  159. data/lib/oddb/html/view/paypal/collect.rb +95 -0
  160. data/lib/oddb/html/view/paypal/redirect.rb +51 -0
  161. data/lib/oddb/html/view/paypal/register_form.rb +149 -0
  162. data/lib/oddb/html/view/register_download.rb +29 -0
  163. data/lib/oddb/html/view/register_export.rb +29 -0
  164. data/lib/oddb/html/view/register_poweruser.rb +29 -0
  165. data/lib/oddb/html/view/rss/feedback.rb +64 -0
  166. data/lib/oddb/html/view/rss_preview.rb +61 -0
  167. data/lib/oddb/html/view/search.rb +104 -0
  168. data/lib/oddb/html/view/snapback.rb +24 -0
  169. data/lib/oddb/html/view/template.rb +56 -0
  170. data/lib/oddb/import/dimdi.rb +583 -0
  171. data/lib/oddb/import/excel.rb +45 -0
  172. data/lib/oddb/import/gkv.rb +463 -0
  173. data/lib/oddb/import/importer.rb +36 -0
  174. data/lib/oddb/import/pharma24.rb +211 -0
  175. data/lib/oddb/import/pharmnet.rb +1186 -0
  176. data/lib/oddb/import/rtf.rb +409 -0
  177. data/lib/oddb/import/whocc.rb +148 -0
  178. data/lib/oddb/import/xml.rb +15 -0
  179. data/lib/oddb/model.rb +179 -0
  180. data/lib/oddb/persistence.rb +22 -0
  181. data/lib/oddb/persistence/odba.rb +32 -0
  182. data/lib/oddb/persistence/odba/business/company.rb +13 -0
  183. data/lib/oddb/persistence/odba/business/grant_download.rb +14 -0
  184. data/lib/oddb/persistence/odba/business/invoice.rb +15 -0
  185. data/lib/oddb/persistence/odba/drugs/atc.rb +15 -0
  186. data/lib/oddb/persistence/odba/drugs/galenic_form.rb +18 -0
  187. data/lib/oddb/persistence/odba/drugs/galenic_group.rb +13 -0
  188. data/lib/oddb/persistence/odba/drugs/package.rb +25 -0
  189. data/lib/oddb/persistence/odba/drugs/product.rb +13 -0
  190. data/lib/oddb/persistence/odba/drugs/sequence.rb +21 -0
  191. data/lib/oddb/persistence/odba/drugs/substance.rb +21 -0
  192. data/lib/oddb/persistence/odba/drugs/substance_group.rb +13 -0
  193. data/lib/oddb/persistence/odba/drugs/unit.rb +13 -0
  194. data/lib/oddb/persistence/odba/export.rb +26 -0
  195. data/lib/oddb/persistence/odba/model.rb +68 -0
  196. data/lib/oddb/persistence/odba/text/document.rb +11 -0
  197. data/lib/oddb/persistence/odba/util/code.rb +11 -0
  198. data/lib/oddb/persistence/odba/util/m10l_document.rb +13 -0
  199. data/lib/oddb/persistence/og.rb +16 -0
  200. data/lib/oddb/persistence/og/drugs/composition.rb +14 -0
  201. data/lib/oddb/persistence/og/drugs/product.rb +14 -0
  202. data/lib/oddb/persistence/og/drugs/sequence.rb +15 -0
  203. data/lib/oddb/persistence/og/model.rb +25 -0
  204. data/lib/oddb/persistence/og/util/multilingual.rb +13 -0
  205. data/lib/oddb/redist/rtf_tools/reader.rb +139 -0
  206. data/lib/oddb/remote/business/company.rb +17 -0
  207. data/lib/oddb/remote/drugs/active_agent.rb +27 -0
  208. data/lib/oddb/remote/drugs/atc.rb +31 -0
  209. data/lib/oddb/remote/drugs/dose.rb +8 -0
  210. data/lib/oddb/remote/drugs/galenic_form.rb +24 -0
  211. data/lib/oddb/remote/drugs/package.rb +128 -0
  212. data/lib/oddb/remote/drugs/part.rb +30 -0
  213. data/lib/oddb/remote/drugs/substance.rb +20 -0
  214. data/lib/oddb/remote/drugs/unit.rb +20 -0
  215. data/lib/oddb/remote/object.rb +36 -0
  216. data/lib/oddb/text/chapter.rb +23 -0
  217. data/lib/oddb/text/document.rb +42 -0
  218. data/lib/oddb/text/format.rb +37 -0
  219. data/lib/oddb/text/paragraph.rb +53 -0
  220. data/lib/oddb/text/picture.rb +89 -0
  221. data/lib/oddb/text/table.rb +68 -0
  222. data/lib/oddb/util.rb +9 -0
  223. data/lib/oddb/util/annotated_list.rb +37 -0
  224. data/lib/oddb/util/code.rb +69 -0
  225. data/lib/oddb/util/comparison.rb +36 -0
  226. data/lib/oddb/util/download.rb +17 -0
  227. data/lib/oddb/util/exporter.rb +8 -0
  228. data/lib/oddb/util/feedback.rb +23 -0
  229. data/lib/oddb/util/ipn.rb +53 -0
  230. data/lib/oddb/util/job.rb +23 -0
  231. data/lib/oddb/util/logger.rb +20 -0
  232. data/lib/oddb/util/m10l_document.rb +41 -0
  233. data/lib/oddb/util/mail.rb +87 -0
  234. data/lib/oddb/util/money.rb +64 -0
  235. data/lib/oddb/util/multilingual.rb +70 -0
  236. data/lib/oddb/util/quanty.rb +3 -0
  237. data/lib/oddb/util/quanty/fact.rb +242 -0
  238. data/lib/oddb/util/quanty/main.rb +164 -0
  239. data/lib/oddb/util/quanty/parse.rb +872 -0
  240. data/lib/oddb/util/quanty/units.dump +0 -0
  241. data/lib/oddb/util/server.rb +150 -0
  242. data/lib/oddb/util/smtp_tls.rb +58 -0
  243. data/lib/oddb/util/updater.rb +161 -0
  244. data/lib/oddb/util/ydim.rb +110 -0
  245. data/lib/oddb/util/yus.rb +46 -0
  246. data/test/business/test_company.rb +29 -0
  247. data/test/business/test_grant_download.rb +29 -0
  248. data/test/drugs/test_active_agent.rb +53 -0
  249. data/test/drugs/test_atc.rb +54 -0
  250. data/test/drugs/test_composition.rb +88 -0
  251. data/test/drugs/test_ddd.rb +22 -0
  252. data/test/drugs/test_dose.rb +189 -0
  253. data/test/drugs/test_galenic_form.rb +41 -0
  254. data/test/drugs/test_package.rb +172 -0
  255. data/test/drugs/test_part.rb +32 -0
  256. data/test/drugs/test_product.rb +31 -0
  257. data/test/drugs/test_sequence.rb +140 -0
  258. data/test/drugs/test_substance.rb +51 -0
  259. data/test/drugs/test_substance_group.rb +27 -0
  260. data/test/export/test_rss.rb +86 -0
  261. data/test/export/test_server.rb +163 -0
  262. data/test/export/test_xls.rb +146 -0
  263. data/test/export/test_yaml.rb +120 -0
  264. data/test/import/data/csv/products.csv +11 -0
  265. data/test/import/data/html/dimdi_index.html +400 -0
  266. data/test/import/data/html/gkv/Befreiungsliste_Arzneimittel_Versicherte.gkvnet +508 -0
  267. data/test/import/data/html/pharma24/1337397.html +754 -0
  268. data/test/import/data/html/pharma24/842756.html +570 -0
  269. data/test/import/data/html/pharma24/ac-page-10.html +2999 -0
  270. data/test/import/data/html/pharma24/ac-page-11.html +2999 -0
  271. data/test/import/data/html/pharma24/ac-page-12.html +2999 -0
  272. data/test/import/data/html/pharma24/ac-page-13.html +2999 -0
  273. data/test/import/data/html/pharma24/ac-page-14.html +2999 -0
  274. data/test/import/data/html/pharma24/ac-page-15.html +3011 -0
  275. data/test/import/data/html/pharma24/ac-page-16.html +3050 -0
  276. data/test/import/data/html/pharma24/ac-page-17.html +3285 -0
  277. data/test/import/data/html/pharma24/ac-page-18.html +3109 -0
  278. data/test/import/data/html/pharma24/ac-page-19.html +3126 -0
  279. data/test/import/data/html/pharma24/ac-page-2.html +3005 -0
  280. data/test/import/data/html/pharma24/ac-page-20.html +3007 -0
  281. data/test/import/data/html/pharma24/ac-page-21.html +2999 -0
  282. data/test/import/data/html/pharma24/ac-page-22.html +2999 -0
  283. data/test/import/data/html/pharma24/ac-page-23.html +3055 -0
  284. data/test/import/data/html/pharma24/ac-page-24.html +2999 -0
  285. data/test/import/data/html/pharma24/ac-page-25.html +3004 -0
  286. data/test/import/data/html/pharma24/ac-page-26.html +2999 -0
  287. data/test/import/data/html/pharma24/ac-page-27.html +3167 -0
  288. data/test/import/data/html/pharma24/ac-page-28.html +3236 -0
  289. data/test/import/data/html/pharma24/ac-page-29.html +3110 -0
  290. data/test/import/data/html/pharma24/ac-page-3.html +2999 -0
  291. data/test/import/data/html/pharma24/ac-page-30.html +2999 -0
  292. data/test/import/data/html/pharma24/ac-page-31.html +2999 -0
  293. data/test/import/data/html/pharma24/ac-page-32.html +2999 -0
  294. data/test/import/data/html/pharma24/ac-page-33.html +3001 -0
  295. data/test/import/data/html/pharma24/ac-page-34.html +2999 -0
  296. data/test/import/data/html/pharma24/ac-page-35.html +2999 -0
  297. data/test/import/data/html/pharma24/ac-page-36.html +2999 -0
  298. data/test/import/data/html/pharma24/ac-page-37.html +2999 -0
  299. data/test/import/data/html/pharma24/ac-page-38.html +3003 -0
  300. data/test/import/data/html/pharma24/ac-page-39.html +2999 -0
  301. data/test/import/data/html/pharma24/ac-page-4.html +2999 -0
  302. data/test/import/data/html/pharma24/ac-page-40.html +2999 -0
  303. data/test/import/data/html/pharma24/ac-page-41.html +2999 -0
  304. data/test/import/data/html/pharma24/ac-page-42.html +2999 -0
  305. data/test/import/data/html/pharma24/ac-page-43.html +2999 -0
  306. data/test/import/data/html/pharma24/ac-page-44.html +2999 -0
  307. data/test/import/data/html/pharma24/ac-page-45.html +2999 -0
  308. data/test/import/data/html/pharma24/ac-page-46.html +2999 -0
  309. data/test/import/data/html/pharma24/ac-page-47.html +2999 -0
  310. data/test/import/data/html/pharma24/ac-page-48.html +2999 -0
  311. data/test/import/data/html/pharma24/ac-page-49.html +2999 -0
  312. data/test/import/data/html/pharma24/ac-page-5.html +3168 -0
  313. data/test/import/data/html/pharma24/ac-page-50.html +2999 -0
  314. data/test/import/data/html/pharma24/ac-page-51.html +2999 -0
  315. data/test/import/data/html/pharma24/ac-page-52.html +3003 -0
  316. data/test/import/data/html/pharma24/ac-page-53.html +2999 -0
  317. data/test/import/data/html/pharma24/ac-page-54.html +3095 -0
  318. data/test/import/data/html/pharma24/ac-page-55.html +3041 -0
  319. data/test/import/data/html/pharma24/ac-page-56.html +2999 -0
  320. data/test/import/data/html/pharma24/ac-page-57.html +3001 -0
  321. data/test/import/data/html/pharma24/ac-page-58.html +3001 -0
  322. data/test/import/data/html/pharma24/ac-page-59.html +2999 -0
  323. data/test/import/data/html/pharma24/ac-page-6.html +3072 -0
  324. data/test/import/data/html/pharma24/ac-page-60.html +3001 -0
  325. data/test/import/data/html/pharma24/ac-page-61.html +3005 -0
  326. data/test/import/data/html/pharma24/ac-page-62.html +2999 -0
  327. data/test/import/data/html/pharma24/ac-page-63.html +3007 -0
  328. data/test/import/data/html/pharma24/ac-page-64.html +3007 -0
  329. data/test/import/data/html/pharma24/ac-page-65.html +2999 -0
  330. data/test/import/data/html/pharma24/ac-page-66.html +3011 -0
  331. data/test/import/data/html/pharma24/ac-page-67.html +3026 -0
  332. data/test/import/data/html/pharma24/ac-page-68.html +2999 -0
  333. data/test/import/data/html/pharma24/ac-page-69.html +3010 -0
  334. data/test/import/data/html/pharma24/ac-page-7.html +2999 -0
  335. data/test/import/data/html/pharma24/ac-page-70.html +3192 -0
  336. data/test/import/data/html/pharma24/ac-page-71.html +3133 -0
  337. data/test/import/data/html/pharma24/ac-page-72.html +2999 -0
  338. data/test/import/data/html/pharma24/ac-page-73.html +3227 -0
  339. data/test/import/data/html/pharma24/ac-page-74.html +3241 -0
  340. data/test/import/data/html/pharma24/ac-page-75.html +3227 -0
  341. data/test/import/data/html/pharma24/ac-page-76.html +3244 -0
  342. data/test/import/data/html/pharma24/ac-page-77.html +1164 -0
  343. data/test/import/data/html/pharma24/ac-page-8.html +2999 -0
  344. data/test/import/data/html/pharma24/ac-page-9.html +2999 -0
  345. data/test/import/data/html/pharma24/ac.html +2999 -0
  346. data/test/import/data/html/pharmnet/display.html +662 -0
  347. data/test/import/data/html/pharmnet/display1.html +625 -0
  348. data/test/import/data/html/pharmnet/display2.html +625 -0
  349. data/test/import/data/html/pharmnet/display3.html +625 -0
  350. data/test/import/data/html/pharmnet/display_tramal.html +634 -0
  351. data/test/import/data/html/pharmnet/empty_result.html +395 -0
  352. data/test/import/data/html/pharmnet/gate.html +246 -0
  353. data/test/import/data/html/pharmnet/index.html +258 -0
  354. data/test/import/data/html/pharmnet/paged_result_1.html +401 -0
  355. data/test/import/data/html/pharmnet/paged_result_2.html +401 -0
  356. data/test/import/data/html/pharmnet/result.html +401 -0
  357. data/test/import/data/html/pharmnet/search.html +865 -0
  358. data/test/import/data/html/pharmnet/search_filtered.html +182 -0
  359. data/test/import/data/html/whocc/A.html +56 -0
  360. data/test/import/data/html/whocc/A03.html +48 -0
  361. data/test/import/data/html/whocc/A03AB.html +48 -0
  362. data/test/import/data/html/whocc/A06AA.html +47 -0
  363. data/test/import/data/html/whocc/C03.html +47 -0
  364. data/test/import/data/html/whocc/login.html +77 -0
  365. data/test/import/data/mail/csv.mail +81 -0
  366. data/test/import/data/rtf/pharmnet/aarane.pi.rtf +648 -0
  367. data/test/import/data/rtf/pharmnet/ace_hemmer_ratio.pi.rtf +324 -0
  368. data/test/import/data/rtf/pharmnet/ace_hemmer_ratio.rtf +4816 -0
  369. data/test/import/data/rtf/pharmnet/acemetacin.pi.rtf +388 -0
  370. data/test/import/data/rtf/pharmnet/acemit.pi.rtf +240 -0
  371. data/test/import/data/rtf/pharmnet/acerbon.pi.rtf +1257 -0
  372. data/test/import/data/rtf/pharmnet/acetylcystein.pi.rtf +323 -0
  373. data/test/import/data/rtf/pharmnet/aciclo.pi.rtf +287 -0
  374. data/test/import/data/rtf/pharmnet/aciclovir.pi.rtf +236 -0
  375. data/test/import/data/rtf/pharmnet/actrapid.pi.rtf +322 -0
  376. data/test/import/data/rtf/pharmnet/amlodipin.pi.rtf +452 -0
  377. data/test/import/data/rtf/pharmnet/amlodipin.rtf +473 -0
  378. data/test/import/data/rtf/pharmnet/aspirin.pi.rtf +313 -0
  379. data/test/import/data/rtf/pharmnet/aspirin.rtf +781 -0
  380. data/test/import/data/rtf/pharmnet/baymycard.pi.rtf +447 -0
  381. data/test/import/data/rtf/pharmnet/omeprazol.pi.rtf +510 -0
  382. data/test/import/data/rtf/pharmnet/omeprazol.rtf +9216 -0
  383. data/test/import/data/rtf/pharmnet/paroxetin.pi.rtf +678 -0
  384. data/test/import/data/rtf/pharmnet/selegilin.pi.rtf +312 -0
  385. data/test/import/data/rtf/pharmnet/selegilin.rtf +683 -0
  386. data/test/import/data/rtf/pharmnet/valium.pi.rtf +387 -0
  387. data/test/import/data/txt/gkv/gkv_p1.txt +17 -0
  388. data/test/import/data/xls/darform_010706.xls +0 -0
  389. data/test/import/data/xls/fb010706.xls +0 -0
  390. data/test/import/data/xls/liste_zuzahlungsbefreite_arzneimittel_suchfunktion.xls +0 -0
  391. data/test/import/data/xls/wirkkurz_010406.xls +0 -0
  392. data/test/import/data/xml/ATC_2006.xml +47 -0
  393. data/test/import/data/xml/ATC_2006_ddd.xml +35 -0
  394. data/test/import/test_dimdi.rb +323 -0
  395. data/test/import/test_excel.rb +31 -0
  396. data/test/import/test_gkv.rb +260 -0
  397. data/test/import/test_pharma24.rb +112 -0
  398. data/test/import/test_pharmnet.rb +980 -0
  399. data/test/import/test_rtf.rb +37 -0
  400. data/test/import/test_whocc.rb +314 -0
  401. data/test/remote/drugs/test_active_agent.rb +36 -0
  402. data/test/selenium/selenium-server.jar +0 -0
  403. data/test/selenium/test_atc_browser.rb +121 -0
  404. data/test/selenium/test_atc_guidelines.rb +95 -0
  405. data/test/selenium/test_collect.rb +137 -0
  406. data/test/selenium/test_compare.rb +294 -0
  407. data/test/selenium/test_fachinfo.rb +128 -0
  408. data/test/selenium/test_feedback.rb +192 -0
  409. data/test/selenium/test_init.rb +64 -0
  410. data/test/selenium/test_limit.rb +304 -0
  411. data/test/selenium/test_login.rb +67 -0
  412. data/test/selenium/test_package.rb +516 -0
  413. data/test/selenium/test_patinfo.rb +128 -0
  414. data/test/selenium/test_product.rb +80 -0
  415. data/test/selenium/test_products.rb +141 -0
  416. data/test/selenium/test_search.rb +933 -0
  417. data/test/selenium/test_sequence.rb +513 -0
  418. data/test/selenium/unit.rb +190 -0
  419. data/test/stub/http_server.rb +144 -0
  420. data/test/stub/model.rb +173 -0
  421. data/test/suite.rb +15 -0
  422. data/test/test_model.rb +83 -0
  423. data/test/util/test_code.rb +74 -0
  424. data/test/util/test_ipn.rb +117 -0
  425. data/test/util/test_mail.rb +85 -0
  426. data/test/util/test_multilingual.rb +97 -0
  427. data/test/util/test_server.rb +94 -0
  428. data/test/util/test_updater.rb +130 -0
  429. data/test/util/test_ydim.rb +115 -0
  430. data/test/util/test_yus.rb +79 -0
  431. metadata +568 -0
@@ -0,0 +1,36 @@
1
+ require 'encoding/character/utf-8'
2
+ require 'iconv'
3
+
4
+ module ODDB
5
+ module Import
6
+ class Importer
7
+ @@iconv = Iconv.new('utf8//IGNORE//TRANSLIT', 'latin1')
8
+ @@lower = /^(and|for|in(cl)?|on|plain|with)$/i
9
+ attr_accessor :report
10
+ def initialize
11
+ @report = []
12
+ @skip_rows = 1
13
+ end
14
+ def capitalize_all(str)
15
+ ## benchmarked fastest against an append (<<) solution
16
+ str.split(/\b/).collect { |part|
17
+ @@lower.match(part) ? part.downcase : part.capitalize }.join
18
+ end
19
+ def company_name(cname)
20
+ cname = capitalize_all(cname.to_s)
21
+ cname.gsub!(/\.(?!\s)/, '. ')
22
+ cname.gsub!(/[\/&]/) { |match| ' %s ' % match }
23
+ cname.gsub!(/Gmbh/, 'GmbH')
24
+ cname.gsub!(/Ag\b/, 'AG')
25
+ cname.gsub!(/\bKg\b/, 'KG')
26
+ cname.strip!
27
+ u(cname)
28
+ end
29
+ def postprocess
30
+ end
31
+ def utf8(str)
32
+ u @@iconv.iconv(str) if str
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,211 @@
1
+ #!/usr/bin/env ruby
2
+ # Import::Pharma24 -- de.oddb.org -- 21.04.2008 -- hwyss@ywesee.com
3
+
4
+ require 'oddb/import/importer'
5
+ require 'oddb/util/money'
6
+
7
+ module ODDB
8
+ module Import
9
+ class Pharma24 < Importer
10
+ def initialize
11
+ @count = 0
12
+ @created_companies = 0
13
+ @found = 0
14
+ @host = 'http://www.apotheke-online-internet.de'
15
+ end
16
+ def import(agent, packages, opts={:all => false})
17
+ agent.max_history = 1
18
+ packages.collect! { |package| package.odba_id }
19
+ while id = packages.shift
20
+ update_package(agent, ODBA.cache.fetch(id), opts)
21
+ end
22
+ report
23
+ end
24
+ def import_company(data)
25
+ name = company_name(data[:company])
26
+ company = Business::Company.find_by_name(name)
27
+ if(company.nil?)
28
+ @created_companies += 1
29
+ company = Business::Company.new
30
+ company.name.de = name
31
+ end
32
+ company
33
+ end
34
+ def import_size(data, package)
35
+ part = package.parts.first || package.add_part(Drugs::Part.new)
36
+ dose, size, multi = data[:size].to_s.split(/x/i, 3).reverse.compact
37
+ unit = data[:unit].to_s
38
+ if(unit != 'St')
39
+ part.quantity = Drugs::Dose.new(dose, unit)
40
+ elsif(multi.nil?)
41
+ multi = size
42
+ size = dose
43
+ end
44
+ multi = multi.to_i
45
+ size = size.to_i
46
+ part.multi = (multi > 0) ? multi : nil
47
+ part.size = (size > 0) ? size : nil
48
+ if(unitname = data[:unitname])
49
+ unit = Drugs::Unit.find_by_name(unitname)
50
+ unless(unit)
51
+ unit = Drugs::Unit.new
52
+ unit.name.de = unitname
53
+ unit.save
54
+ end
55
+ part.unit = unit
56
+ end
57
+ part.save
58
+ end
59
+ def interesting_tables node
60
+ (node/'table').find_all do |inner_node| !(inner_node/'h2/a').empty? end.to_a
61
+ end
62
+ def get_alphabetical agent, fst, snd
63
+ url = "#@host/#{fst}#{snd}.html"
64
+ page = agent.get url
65
+ data = extract_data page
66
+ while (link = (page/'//a[@class="pageResults"]').last) \
67
+ && link.inner_text == '[n?chste?>>]'
68
+ page = agent.get link.attributes['href']
69
+ data.concat extract_data(page)
70
+ end
71
+ data
72
+ end
73
+ def extract_data page
74
+ data = []
75
+ ## this should be page/'table[h2/a]'
76
+ # -> but Nokogiri apparently can't handle that
77
+ all_tables = interesting_tables page
78
+ duplicates = []
79
+ all_tables.each do |table|
80
+ duplicates.concat interesting_tables(table)
81
+ end
82
+ (all_tables - duplicates).each do |table|
83
+ link, = table/'h2/a'
84
+ if link
85
+ prod = {
86
+ :name => utf8(link.inner_text),
87
+ :url => link.attribute('href').to_s,
88
+ }
89
+ if price = (table/:strong).first
90
+ prod.store :price_public, price.inner_text.tr(',', '.').to_f
91
+ end
92
+ ## should be (table/'td[text()="Abgabehinweis:"]').first
93
+ # -> but Nokogiri apparently can't handle that
94
+ if prescription = td_with_text(table, "Abgabehinweis:")
95
+ td, = prescription.xpath('following-sibling::td')
96
+ prod.store :code_prescription,
97
+ !!/Rezeptpflichtig/.match(td.inner_text)
98
+ end
99
+ ## should be (table/'td[text()="Packungsinhalt:"]').first
100
+ # -> but Nokogiri apparently can't handle that
101
+ if content = td_with_text(table, "Packungsinhalt:")
102
+ td, = content.xpath('following-sibling::td')
103
+ size_str = td.inner_text
104
+ if match = /\s*(.*)\s+(\S+)\s+(\S+)\s*$/.match(size_str)
105
+ size = utf8 match[1]
106
+ unit = utf8 match[2]
107
+ name = utf8 match[3]
108
+ if size.empty?
109
+ size, unit, name = unit, name, nil
110
+ end
111
+ prod.update :size => size, :unit => unit, :unitname => name
112
+ end
113
+ end
114
+ if company = (table/'a[@class="liste"]').first
115
+ prod.store :company, utf8(company.inner_text)
116
+ end
117
+ data.push prod
118
+ end
119
+ end
120
+ data
121
+ end
122
+ def report
123
+ lines = [
124
+ sprintf("Checked %5i Packages", @count),
125
+ sprintf("Updated %5i Packages", @found),
126
+ sprintf("Created %5i Companies", @created_companies),
127
+ ]
128
+ lines
129
+ end
130
+ def search agent, term
131
+ url = "#@host/advanced_search_result.php?keywords=#{term}"
132
+ page = agent.get url
133
+ extract_data page
134
+ rescue Zlib::GzipFile::Error => err
135
+ retries ||= 3
136
+ if retries > 0
137
+ retries -= 1
138
+ retry
139
+ else
140
+ err.message << " after 3 retries - url: #{url}"
141
+ raise err
142
+ end
143
+ rescue StandardError => err
144
+ err.message << " url: #{url}"
145
+ raise err
146
+ end
147
+ def td_with_text table, text
148
+ nodes = (table/'td').find_all do |node|
149
+ node.text.strip == text
150
+ end
151
+ nodes.first
152
+ end
153
+ def update_package agent, package, opts={}
154
+ price = package.price(:public)
155
+ resale = [ :pharma24,
156
+ :csv_product_infos ].include?(package.data_origin(:price_public))
157
+ needs_update = opts[:all] || price.nil? || resale
158
+ if needs_update && (code = package.code(:cid, 'DE'))
159
+ @count += 1
160
+ data, = search agent, code.value
161
+ if data
162
+ @found += 1
163
+ package.name.de = u(data[:name])
164
+ presc = data[:code_prescription]
165
+ if(code = package.code(:prescription))
166
+ if(code.value != presc)
167
+ code.value = presc
168
+ end
169
+ else
170
+ package.add_code Util::Code.new(:prescription, presc, 'DE')
171
+ end
172
+ amount = data[:price_public]
173
+ if(amount > 0)
174
+ update_price package, :public, amount
175
+ if presc
176
+ update_price package, :exfactory, package._price_exfactory
177
+ end
178
+ end
179
+ import_size data, package
180
+ package.save
181
+ if((product = package.product) && product.company.nil?)
182
+ product.company = import_company(data)
183
+ product.save
184
+ end
185
+ end
186
+ end
187
+ end
188
+ def update_price package, type, amount
189
+ dotype = :"price_#{type}"
190
+ # if this price has been edited manually we won't overwrite
191
+ unless((data_origin = package.data_origin(dotype)) \
192
+ && data_origin.to_s.include?('@'))
193
+ either = false
194
+ if(price = package.price(type, 'DE'))
195
+ if(price != amount)
196
+ price.amount = amount
197
+ either = true
198
+ end
199
+ else
200
+ price = Util::Money.new(amount, type, 'DE')
201
+ package.add_price(price)
202
+ either = true
203
+ end
204
+ if either
205
+ package.data_origins.store dotype, :pharma24
206
+ end
207
+ end
208
+ end
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,1186 @@
1
+ #!/usr/bin/env ruby
2
+ # Import::PharmNet -- de.oddb.org -- 15.10.2007 -- hwyss@ywesee.com
3
+
4
+ require 'fileutils'
5
+ require 'htmlentities'
6
+ require 'mechanize'
7
+ require 'oddb/import/importer'
8
+ require 'oddb/import/rtf'
9
+ require 'oddb/util/mail'
10
+ require 'pp'
11
+
12
+ module ODDB
13
+ module Import
14
+ module PharmNet
15
+ class EncodedParser < Mechanize::Page
16
+ @@iconv = Iconv.new('utf8', 'latin1')
17
+ def initialize(uri=nil, response=nil, body=nil, code=nil)
18
+ body = @@iconv.iconv(body.gsub(/iso-8859-1/i, 'utf-8'))
19
+ ## HtmlEntities seems to kill the parser, do it manually for now
20
+ #htmlentities = HTMLEntities.new
21
+ #body = htmlentities.decode(body)
22
+ body.gsub! '&aacute;', 'á'
23
+ body.gsub! '&agrave;', 'à'
24
+ body.gsub! '&auml;', 'ä'
25
+ body.gsub! '&eacute;', 'é'
26
+ body.gsub! '&egrave;', 'è'
27
+ body.gsub! '&euml;', 'ë'
28
+ body.gsub! '&iacute;', 'í'
29
+ body.gsub! '&igrave;', 'ì'
30
+ body.gsub! '&iuml;', 'ï'
31
+ body.gsub! '&oacute;', 'ó'
32
+ body.gsub! '&ograve;', 'ò'
33
+ body.gsub! '&ouml;', 'ö'
34
+ body.gsub! '&uacute;', 'ú'
35
+ body.gsub! '&ugrave;', 'ù'
36
+ body.gsub! '&uuml;', 'ü'
37
+ super(uri, response, body, code)
38
+ end
39
+ end
40
+ class RenewableAgent < SimpleDelegator
41
+ def initialize agent
42
+ super
43
+ renew!
44
+ end
45
+ def renew!
46
+ agent = __getobj__.class.new
47
+ proxies = ODDB.config.http_proxies
48
+ host, port = proxies.at rand(proxies.size)
49
+ if host
50
+ ODDB.logger.debug('PharmNet') {
51
+ "Using proxy server #{host}:#{port}"
52
+ }
53
+ agent.set_proxy host, port
54
+ end
55
+ agent.pluggable_parser.html = EncodedParser
56
+ __setobj__ agent
57
+ end
58
+ end
59
+ class TermedRtf < Rtf
60
+ def initialize(term)
61
+ @term = term
62
+ end
63
+ end
64
+ class FiParser < TermedRtf
65
+ def identify_chapter buffer
66
+ name = case buffer
67
+ when /^1[08]\.?\s*Stand/i
68
+ 'date'
69
+ when /^14\.?\s*Sonstige\s+Hinweise/i
70
+ 'other_advice'
71
+ when /^(2|11)\.?\s*(Verschreibung|Verkauf)/i
72
+ 'sale_limitation'
73
+ when /^1\.?\s*Bezeichnung/i
74
+ 'name'
75
+ when /^[23]\.?\s*(Qualitative|Zusammensetzung)/i
76
+ 'composition'
77
+ when /^3\.?\s*Darreichung/i
78
+ 'galenic_form'
79
+ when /^3\.1\.?\s*Stoff/i
80
+ 'substance_group'
81
+ when /^3\.2\.?\s*(Arzneilich|Bestandteile)/i
82
+ 'active_agents'
83
+ when /^4(\.1)?\.?\s*Anwendung/i
84
+ 'indications'
85
+ when /^(10|4\.2)\.?\s*Dosierung/i
86
+ 'dosage'
87
+ when /^11\.?\s*Art\s+und\s+Dauer/i
88
+ 'application'
89
+ when /^(5|4\.3)\.?\s*Gegenanzeigen/i
90
+ 'counterindications'
91
+ when /^(8|4\.4)\.?\s*(Besondere\s+)?Warnhinweise/i
92
+ 'precautions'
93
+ when /^(7|4\.5)\.?\s*Wechselwirkungen/i
94
+ 'interactions'
95
+ when /^4\.6\.?\s*(Anwendung|Schwangerschaft)/i
96
+ 'pregnancy'
97
+ when /^4\.7\.?\s*Auswirkung/i
98
+ 'driving_ability'
99
+ when /^(6|4\.8)\.?\s*Nebenwirkungen/i
100
+ 'unwanted_effects'
101
+ when /^(12|4\.9)\.?\s*(Notfall|Überdosierung)/i
102
+ 'overdose'
103
+ when /^4\.?\s*Klinisch/i
104
+ 'clinical'
105
+ when /^5\.1\.?\s*Pharmakodynamisch/i
106
+ 'pharmacodynamics'
107
+ when /^13\.2\.?\s*Toxikologisch/i
108
+ 'toxicology'
109
+ when /^(13\.3|5\.2)\.?\s*Pharmakokineti(sch|k)/i
110
+ 'pharmacokinetics'
111
+ when /^13\.4\.?\s*Bioverfügbarkeit/i
112
+ 'bioavailability'
113
+ when /^5\.3\.?\s*Präklinisch/i
114
+ 'preclinicals'
115
+ when /^(13|5)\.?\s*Pharmakologisch/i
116
+ 'pharmacology'
117
+ when /^(3\.3|6\.1)\.?\s*(Liste|Hilfsstoffe?|Sonstige\s+Bestandteile)/i
118
+ 'excipients'
119
+ when /^(9|6\.2)\.?\s*(Wichtigste\s+)?Inkompatibilitäten/i
120
+ 'incompatibilities'
121
+ when /^(15|6\.3)\.?\s*(Dauer|Haltbarkeit)/i
122
+ 'shelf_life'
123
+ when /^(16|6\.4)\.?\s*(Besondere|Lagerung|Aufbewahrung)/i
124
+ 'storage'
125
+ when /^6\.5\.?\s*(Art|Behältnis)/i,
126
+ /^17\.?\s*Darreichungsformen\s+und\sPackung/
127
+ 'packaging'
128
+ when /^6\.6\.?\s*(Besondere|Hinweis|Entsorgung)/i
129
+ 'disposal'
130
+ when /^6\.?\sPharmazeutisch/i
131
+ 'pharmaceutic'
132
+ when /^(19|7)\.?\s*(Name|Pharmazeutischer|Inhaber)/i
133
+ 'company'
134
+ when /^20\.?\s*(Name|Hersteller)/i
135
+ 'producer'
136
+ when /^8\.?\s*Zulassung/i
137
+ 'registration'
138
+ when /^9\.?\s*Datum/i
139
+ 'registration_date'
140
+ when /^zusätzliche Angaben/i
141
+ 'additional_information'
142
+ end
143
+ if(name && !@document.chapter(name))
144
+ @document.add_chapter Text::Chapter.new(name)
145
+ end
146
+ super
147
+ end
148
+ def _sanitize_text(value)
149
+ if @buffer.empty? && @buffer.is_a?(Text::Paragraph)
150
+ value.gsub! /^([BF][A-Z0-9]{1,2})?\s*/, ''
151
+ end
152
+ end
153
+ end
154
+ class PiParser < TermedRtf
155
+ def identify_chapter buffer
156
+ name = nil
157
+ if(/\b#@term\b/i.match buffer)
158
+ name = case buffer
159
+ when /wof(ü|Ü|ue)r\s+(wird|werden)\s+(es|sie)\s+(angewendet|eingenommen)/i,
160
+ /wird\s+angewendet$/i
161
+ 'indications'
162
+ when /^(3\.?\s*)?Wie\s+(ist|sind).+?(anzuwenden|einzunehmen)\?/i
163
+ 'application'
164
+ when /vor\s+der\s+(Anwendung|Einnahme)\s+von/i
165
+ 'precautions'
166
+ when /^([56]\.?\s*)?Wie\s+(ist|sind).+?aufzubewahren/i
167
+ 'storage'
168
+ when /^Bitte\s.+für\s+Kinder\s+nicht\s+erreichbar/i
169
+ 'personal'
170
+ when /^([45]\.?\s*)?Welche\s+Nebenwirkungen/i, /^Nebenwirkungen:?$/i
171
+ 'unwanted_effects'
172
+ when /Behandlungserfolg/i
173
+ nil ## prevent composition if this is a dodgy match
174
+ else
175
+ 'composition'
176
+ end
177
+ else
178
+ name = case buffer
179
+ when /^([45]\.?\s*)?Welche\s+Nebenwirkungen/i, /^Nebenwirkungen:?$/i
180
+ 'unwanted_effects'
181
+ when /^(4\.?\s*)?Verhalten\s+im\s+Notfall/i
182
+ 'emergency'
183
+ when /^(6\.?\s*)?(Weitere\s+)?(Informationen|Angaben)/i,
184
+ /^(6\.?\s*)?Gebrauchsanleitung/i,
185
+ /^Zusätzliche\s+Informationen/i
186
+ 'additional_information'
187
+ when /^Anwendungsgebiete/i
188
+ 'indications'
189
+ when /^Vorsichtsma(ss|ß)nahmen/i
190
+ 'precautions'
191
+ when /^Dosierung\s*($|und)/i, /^Dosierungsanleitung/
192
+ 'application'
193
+ when /Angaben\s+zur\s+Haltbarkeit/i
194
+ 'storage'
195
+ when /^Gegenanzeigen/i
196
+ 'counterindications'
197
+ when /^Darreichungsform/i
198
+ 'packaging'
199
+ when /^(Hersteller.+)?Pharmazeutischer\s+Unternehmer/i,
200
+ /^Pharmazeutischer\s+Hersteller/i
201
+ 'company'
202
+ when /^\s*Stand\b/, /wurde\s+zuletzt\s+überarbeitet/i
203
+ 'date'
204
+ when /^(Sehr\s+geehrte|Liebe)r?\s+Patient/i,
205
+ /^Bitte\s.+für\s+Kinder\s+nicht\s+erreichbar/i
206
+ 'personal'
207
+ end
208
+ end
209
+ composition = @document.chapter('composition')
210
+ if(name && (name == 'composition' || composition))
211
+ chapter = @document.chapter(name)
212
+ if(chapter.nil?)
213
+ @document.add_chapter Text::Chapter.new(name)
214
+ else
215
+ pars = chapter.paragraphs.select do |par| !par.to_s.strip.empty? end
216
+ if(pars.size == 1 && /^\d+/.match(pars.first))
217
+ ## some PI insert a document-overview after the composition, in which
218
+ # case we have an erroneous chapter, identified by only consisting of
219
+ # a heading. In that case:
220
+ composition.append chapter
221
+ @document.remove_chapter chapter
222
+ @document.add_chapter Text::Chapter.new(name)
223
+ end
224
+ end
225
+ end
226
+ super
227
+ end
228
+ def _sanitize_text(value)
229
+ ## some rtfs have unusable information prior to the actual PI
230
+ case value
231
+ when /^PCX\b/
232
+ init
233
+ when /Gebrauchsinformation/
234
+ init if /Recyclinglogo/.match(current_chapter.to_s)
235
+ end
236
+ if @buffer.empty? && @buffer.is_a?(Text::Paragraph)
237
+ value.gsub! /^([P][A-Z0-9]{1,2})?\b/, ''
238
+ value.lstrip!
239
+ end
240
+ end
241
+ end
242
+ class Importer < Importer
243
+ ERROR_EXPLANATIONS = {
244
+ "execution expired" => "the server stopped responding.",
245
+ "503 => Net::HTTPServiceUnavailable" => "the server is unavailable: http://en.wikipedia.org/wiki/HTTP_503#5xx_Server_Error",
246
+ "Invalid RTF-File: Text before rtf-version" => "the link pointed to a file that could not be parsed as RTF (probably a PDF)",
247
+ "Multiple assignment of Registration-Number" => <<-EOS,
248
+ there is already a Registration in the system with this Registration-Number.
249
+ The two Registrations should probably be merged manually.
250
+ EOS
251
+ }
252
+ attr_reader :errors
253
+ def initialize
254
+ @stop = /(Pharma(ceuticals|zeutische\s*Fabrik)?|Arzneim(ittel|\.)|GmbH|[u&]\.?\s*Co\.?|Kg|Ltd\.?|')\s*/i
255
+ @htmlentities = HTMLEntities.new
256
+ @result_cache = {}
257
+ @distance_cache = {}
258
+ @errors = {}
259
+ @assigned = Hash.new 0
260
+ @removed = Hash.new 0
261
+ @not_removed = Hash.new 0
262
+ @repaired = 0
263
+ @reparsed_fis = 0
264
+ @reparsed_pis = 0
265
+ @products_created = 0
266
+ @sequences_created = 0
267
+ @packages_created = 0
268
+ @archive = File.join ODDB.config.var, 'rtf', 'pharmnet'
269
+ @sources = {}
270
+ FileUtils.mkdir_p @archive
271
+ @latest = File.join ODDB.config.var, 'html', 'pharmnet', 'latest.html'
272
+ FileUtils.mkdir_p File.dirname(@latest)
273
+ super
274
+ end
275
+ def assign_info(key, agent, data, sequence, opts)
276
+ return(remove_info key, sequence, opts) unless(url = data[key])
277
+
278
+ sequence.send "#{key}_url=", "http://gripsdb.dimdi.de#{url}"
279
+ term = data[:search_term]
280
+ doc = import_rtf key, agent, url, term, opts
281
+ doc.date = data[:"date_#{key}"]
282
+ # arbitrary cutoff: fachinfos with less than 5 chapters can't be right...
283
+ if doc.chapters.size > 5
284
+ _assign_info key, doc, sequence, opts
285
+ else
286
+ ODDB.logger.debug('PharmNet') {
287
+ sprintf("Discarding %s for %s (%s)", key, sequence_name(sequence), term)
288
+ }
289
+ remove_info key, sequence, opts
290
+ end
291
+ rescue Timeout::Error, StandardError => error
292
+ sequence.save
293
+ ODDB.logger.error('PharmNet') {
294
+ sprintf("%s: %s", error.class, error.message) << "\n" << error.backtrace.join("\n")
295
+ }
296
+ (@errors[error.message[0,42]] ||= []).push [ sequence ? sequence_name(sequence) : '',
297
+ error.message, error.backtrace.find { |ln| /pharmnet/.match ln }.to_s.strip, url ]
298
+ end
299
+ def _assign_info(key, doc, sequence, opts={})
300
+ info = sequence.send(key)
301
+ return unless info.empty? || opts[:replace]
302
+
303
+ ODDB.logger.debug('PharmNet') {
304
+ sprintf("Assigning %s to %s", key, sequence_name(sequence))
305
+ }
306
+ info.de = doc
307
+ @assigned[key] += 1
308
+ doc.save
309
+ info.save
310
+ sequence.save
311
+ end
312
+ def assign_registration(sequence, registration)
313
+ if(registration && sequence.code(:registration, 'EU') != registration)
314
+ ODDB.logger.debug('PharmNet') {
315
+ sprintf('Assigning Registration-Number %s to %s',
316
+ registration, sequence_name(sequence))
317
+ }
318
+ if unique_registration? registration
319
+ conflict = Drugs::Sequence.find_by_code(:value => registration,
320
+ :type => 'registration',
321
+ :country => 'EU')
322
+ if(conflict && conflict != sequence)
323
+ raise sprintf("Multiple assignment of Registration-Number %s (%s-%i/%s-%i)",
324
+ registration, sequence_name(sequence), sequence.odba_id,
325
+ conflict.name.de, conflict.odba_id)
326
+ end
327
+ end
328
+ if(code = sequence.code(:registration, 'EU'))
329
+ code.value = registration
330
+ else
331
+ sequence.add_code Util::Code.new(:registration, registration, 'EU')
332
+ end
333
+ sequence.save
334
+ end
335
+ end
336
+ def best_data(sequence, result)
337
+ sname = sequence.name
338
+ unless sname.de
339
+ sname = sequence.product.name
340
+ end
341
+ comparison = [
342
+ sname,
343
+ (gf = sequence.galenic_forms.first) && gf.description,
344
+ (comp = sequence.company) && comp.name,
345
+ ].collect { |ml| ml ? ml.de : '' }
346
+ suitable = suitable_data comparison, result,
347
+ :subcount => sequence.active_agents.size
348
+ max = 0
349
+ relevances = suitable.collect { |data|
350
+ rel = composition_relevance(sequence.active_agents, data)
351
+ max = rel if rel > max
352
+ }
353
+ contenders = []
354
+ relevances.each_with_index { |rel, idx|
355
+ if(rel == max)
356
+ contenders.push suitable.at(idx)
357
+ end
358
+ }
359
+ contenders.sort_by { |data| data[:date_fachinfo] || data[:date_patinfo] }.last
360
+ end
361
+ def _composition_paired_relevance(agent, detail)
362
+ adose = agent.dose.to_f
363
+ ddose = detail[:dose].to_f
364
+ drel = if(adose == 0 || adose == ddose)
365
+ 1
366
+ else
367
+ if(adose < ddose)
368
+ ddose, adose = adose, ddose
369
+ end
370
+ ddose / adose
371
+ end rescue 0
372
+ ignore = /hydrochlorid/
373
+ subname = agent.substance.name.de.gsub(ignore, '')
374
+ detname = detail[:substance].gsub(ignore, '')
375
+ srel = ngram_similarity(subname, detname)
376
+ drel + srel
377
+ end
378
+ def composition_relevance(agents, data)
379
+ details = data[:composition]
380
+ participants = [agents.size, details.size].max
381
+ relevances = {}
382
+ agents.each_with_index { |agent, aidx|
383
+ details.each_with_index { |detail, didx|
384
+ relevances.store [aidx, didx],
385
+ _composition_paired_relevance(agent, detail)
386
+ }
387
+ }
388
+ max = 0
389
+ exclusive_permutation(participants).each { |pairs|
390
+ sum = pairs.inject(0) { |memo, pair|
391
+ memo + relevances[pair].to_f
392
+ }
393
+ if sum > max
394
+ data.store :pairs, pairs
395
+ max = sum
396
+ end
397
+ }
398
+ data.store :relevance, max / participants
399
+ end
400
+ def create_sequence(term, data, company, product, galform)
401
+ pname, gfname, cname = data[:data]
402
+ official = pname[/^[^\d(]+/].strip
403
+ company_name = company.name.de.gsub(@stop, '').strip
404
+ official_with_company = [ official, company_name ].join(' ')
405
+ @sequences_created += 1
406
+ sequence = Drugs::Sequence.new
407
+ composition = Drugs::Composition.new
408
+ composition.sequence = sequence
409
+ composition.galenic_form = galform
410
+ data[:composition].each do |act|
411
+ substance = import_substance act[:substance]
412
+ agent = Drugs::ActiveAgent.new substance, act[:dose]
413
+ agent.composition = composition
414
+ agent.save
415
+ end
416
+ composition.save
417
+ sequence.name.de = official_with_company
418
+ sequence.marketable = data[:marketable]
419
+ sequence.product = product
420
+ sequence.save
421
+ sequence
422
+ end
423
+ def exclusive_permutation(participants)
424
+ left = (0...participants).to_a
425
+ right = left.dup
426
+ _exclusive_permutation(left, right)
427
+ end
428
+ def _exclusive_permutation(left, right)
429
+ if(left.size == 1)
430
+ [[[left.first, right.first]]]
431
+ else
432
+ result = []
433
+ left.each { |first|
434
+ pass_left = left.reject { |val| val == first }
435
+ right.inject(result) { |memo, second|
436
+ pass_right = right.reject { |val| val == second }
437
+ _exclusive_permutation(pass_left, pass_right).each { |rest|
438
+ memo.push [[first, second]].concat(rest)
439
+ }
440
+ }
441
+ }
442
+ result
443
+ end
444
+ end
445
+ def extract_details(page)
446
+ data = {}
447
+ _extract_newest_link(data, :fachinfo, "Fachinformation", page)
448
+ _extract_newest_link(data, :patinfo, "Gebrauchsinformation", page)
449
+ table = (page/"table[@border='1']").first or return data
450
+ rows = (table/"tr")[1..-1] || []
451
+ composition = rows.collect { |row|
452
+ spans = row/"span"
453
+ {
454
+ :ask_nr => _extract_details(spans[0]),
455
+ :substance => _extract_details(spans[1]),
456
+ :dose => parse_dose(_extract_details(spans[2])),
457
+ }
458
+ }
459
+ data.store :composition, composition
460
+ previous = ''
461
+ (page/"span[@class='wbtxt']").each { |span|
462
+ case previous
463
+ when /Reg\.?-Nr\.?/
464
+ data.store :registration, span.inner_text
465
+ when /Verkehrsf/
466
+ data.store :marketable, span.inner_text.include?('ja')
467
+ end
468
+ previous = span.inner_text
469
+ }
470
+ data
471
+ end
472
+ def _extract_details(span)
473
+ @htmlentities.decode(span.inner_html).gsub(/[\t\n]|\302\240/, '')
474
+ end
475
+ def _extract_newest_link(data, key, search, page)
476
+ hrefs = page.links.inject([]) { |memo, link|
477
+ if(/#{search}\b/i.match link.text)
478
+ str = link.text[/(\d{2}\.){2}\d{4}/]
479
+ memo.push [Date.new(*str.split('.').reverse.collect { |num| num.to_i}),
480
+ link.href]
481
+ end
482
+ memo
483
+ }.sort
484
+ if(oldest = hrefs.last)
485
+ data.update :"date_#{key}" => oldest.first, key => oldest.last
486
+ end
487
+ end
488
+ def extract_result(agent, page)
489
+ form = page.form("titlesForm")
490
+ node = form.form_node
491
+ result = _extract_result node
492
+ hrefs = (node/"a").select { |link|
493
+ /^\d*1(-\d+)?$/.match link.inner_text
494
+ }.collect { |link|
495
+ link["href"]
496
+ }.sort.uniq[1..-1]
497
+ if(hrefs)
498
+ hrefs.each_with_index { |href, idx|
499
+ page = agent.get href
500
+ result.concat _extract_result(page.form("titlesForm").form_node)
501
+ }
502
+ end
503
+ result
504
+ end
505
+ def _extract_result node
506
+ rows = (node/"tr")[2..-4] || []
507
+ rows.collect { |row|
508
+ {
509
+ :data => (row/"td//span[@title]").collect { |span| span["title"] },
510
+ :href => (row/"a[@name]").first["href"],
511
+ }
512
+ }
513
+ end
514
+ def fix_composition(agents, data)
515
+ details = data[:composition]
516
+ data[:pairs].each { |aidx, didx|
517
+ agent = agents[aidx]
518
+ detail = details[didx]
519
+ if(agent.dose.nil? || agent.dose.qty == 0)
520
+ if(agent.substance == detail[:substance])
521
+ agent.dose = detail[:dose]
522
+ agent.save
523
+ @repaired += 1
524
+ elsif(!agent.chemical_equivalence)
525
+ agent.chemical_equivalence = Drugs::ActiveAgent.new agent.substance, agent.dose
526
+ agent.chemical_equivalence.save
527
+ substance = import_substance detail[:substance]
528
+ agent.substance = substance
529
+ agent.dose = detail[:dose]
530
+ agent.save
531
+ @repaired += 1
532
+ end
533
+ end
534
+ }
535
+ end
536
+ def get_details(agent, page, result)
537
+ form = page.form("titlesForm")
538
+ form.field("parinfo").value = 'true'
539
+ form.field("docBaseName").value = form.field('baseName').value
540
+ form.field("magicrequestid").value = rand.to_s
541
+ uri = URI.parse result[:href]
542
+ form.action = uri.path
543
+ uri.query.split('&').each { |param|
544
+ key, value = param.split('=', 2)
545
+ if field = form.field(key)
546
+ field.value = value
547
+ end
548
+ }
549
+ page = form.submit
550
+ end
551
+ def get_search_form(agent)
552
+ index = "http://www.pharmnet-bund.de/dynamic/de/am-info-system/index.html"
553
+ page = agent.get index
554
+ form = page.form("pharmnet_amis_off_ppv")
555
+ page = form.submit
556
+ link = page.links.find { |l| /(?<!nicht )akzeptieren/i.match l.text }
557
+ page = link.click
558
+ form = page.form("search_form")
559
+ link = page.links.find { |l| l.attributes["id"] == 'goME' }
560
+ form.action = link.href
561
+ form
562
+ end
563
+ def get_search_result(agent, term, sequence=nil, opts={})
564
+ opts = { :info_unrestricted => false,
565
+ :repair => false, :retries => 3,
566
+ :retry_unit => 60 }.merge opts
567
+ good = nil
568
+ term = term.dup
569
+ ODDB.logger.debug('PharmNet') { sprintf('Searching for %s', term) }
570
+ result = []
571
+ while result.empty?
572
+ return if term.length < 3
573
+ good = term.dup
574
+ result.concat search(agent, term, sequence, opts)
575
+ if(result.empty?)
576
+ good = term.gsub(/\s+/, '-')
577
+ result.concat search(agent, good, sequence, opts)
578
+ end
579
+ term.gsub! /\s*[^\s]+$/, ''
580
+ end
581
+ result.each { |data| data.store(:search_term, good) }
582
+ result
583
+ rescue Timeout::Error, StandardError => error
584
+ ODDB.logger.error('PharmNet') {
585
+ sprintf("%s: %s", error.class, error.message) << "\n" << error.backtrace.join("\n")
586
+ }
587
+ retries ||= opts[:retries]
588
+ if((error.is_a?(Timeout::Error) || /ServerError/.match(error.message)) \
589
+ && retries > 0)
590
+ seconds = opts[:retry_unit] * 4 ** (opts[:retries] - retries)
591
+ ODDB.logger.debug('PharmNet') {
592
+ sprintf("Waiting %i seconds for the server to recover...", seconds)
593
+ }
594
+ sleep seconds
595
+ retries -= 1
596
+ ODDB.logger.debug('PharmNet') {
597
+ "Renewing Mechanize-agent and starting a new Session" }
598
+ agent.renew!
599
+ @search_form = nil
600
+ retry
601
+ else
602
+ (@errors[error.message[0,42]] ||= []).push [ sequence ? sequence_name(sequence) : '',
603
+ error.message, error.backtrace.find { |ln| /pharmnet/.match ln }.to_s.strip ]
604
+ end
605
+ nil
606
+ end
607
+ def identify_details(agent, term, sequence=nil,
608
+ opts = { :info_unrestricted => false,
609
+ :repair => false, :retries => 3})
610
+ if result = get_search_result(agent, term, sequence, opts)
611
+ if result.size == 1
612
+ result.first
613
+ else
614
+ best_data sequence, result
615
+ end
616
+ end
617
+ end
618
+ def identify_product(term, data, company)
619
+ pname, gfname, cname = data[:data]
620
+ official = pname[/^[^\d(]+/].strip
621
+ company_name = company.name.de.gsub(@stop, '').strip
622
+ official_with_company = [ official, company_name ].join(' ')
623
+ term_with_company = [ term, company_name ].join(' ')
624
+ [official_with_company, official, term_with_company, term].each do |cnd|
625
+ if (candidate = Drugs::Product.find_by_name(cnd)) \
626
+ && candidate.company == company
627
+ return candidate
628
+ else
629
+ Drugs::Product.search_by_name(cnd).each do |candidate|
630
+ if candidate.company == company
631
+ return candidate
632
+ end
633
+ end
634
+ end
635
+ end
636
+ ## if we can't find a product, we'll have to create a new one.
637
+ @products_created += 1
638
+ product = Drugs::Product.new
639
+ product.name.de = term_with_company
640
+ product.company = company
641
+ product.save
642
+ end
643
+ def identify_sequence(data, product, galform)
644
+ if product
645
+ doses = data[:composition].collect do |act| act[:dose] end.compact.sort
646
+ product.sequences.find do |seq|
647
+ seq.compositions.size == 1 \
648
+ && seq.doses.compact.sort == doses \
649
+ && seq.galenic_forms == [galform]
650
+ end
651
+ end
652
+ end
653
+ def import(agent, sequences, opts = { :replace => false,
654
+ :reload => false,
655
+ :remove => false,
656
+ :repair => false,
657
+ :reparse => false,
658
+ :reparse_patinfo => false,
659
+ :retries => 3,
660
+ :retry_unit => 60 })
661
+ Util::Mail.notify_admins sprintf("%s: %s", Time.now.strftime('%c'),
662
+ self.class), _import(agent, sequences, opts)
663
+ end
664
+ def _import(agent, sequences, opts = { :replace => false,
665
+ :reload => false,
666
+ :remove => false,
667
+ :repair => false,
668
+ :reparse => false,
669
+ :reparse_patinfo => false,
670
+ :retries => 3,
671
+ :retry_unit => 60 })
672
+ agent = RenewableAgent.new agent
673
+ if resume = opts[:resume]
674
+ resume = resume.to_s.downcase
675
+ sequences = sequences.select { |sequence|
676
+ (name = sequence_name(sequence)) && name.downcase >= resume
677
+ }
678
+ else
679
+ sequences = sequences.select { |sequence|
680
+ sequence_name(sequence)
681
+ }
682
+ end
683
+ sequences = sequences.sort_by { |sequence|
684
+ sequence_name(sequence)
685
+ }
686
+ count = 0
687
+ head = sequences.first.name
688
+ @checked = "Checked 0 Sequences"
689
+ ## let odba cache release unneeded sequences ...
690
+ sequences.collect! { |sequence| sequence.odba_id }
691
+ while odba_id = sequences.shift
692
+ begin
693
+ ## ... and refetch them when necessary
694
+ sequence = ODBA.cache.fetch(odba_id)
695
+ count += 1
696
+ @checked = sprintf "Checked %i Sequences from '%s' to '%s'",
697
+ count, head, sequence_name(sequence)
698
+ process(agent, sequence, opts)
699
+ rescue ODBA::OdbaError
700
+ end
701
+ end
702
+ report
703
+ end
704
+ def import_company(name)
705
+ term = clean = name.gsub(@stop, '').strip
706
+ company = Business::Company.find_by_name(term)
707
+ while company.nil? && !term.empty?
708
+ company = Business::Company.search_by_name(term).find do |gf|
709
+ relevance = ngram_similarity clean, gf.name.de.gsub(@stop, '')
710
+ relevance > 0.8
711
+ end
712
+ term = term.gsub /(^|\s)+\S+\s*$/, ''
713
+ end
714
+ if company
715
+ company.name.add_synonym name
716
+ else
717
+ company = Business::Company.new
718
+ company.name.de = name
719
+ end
720
+ company.save
721
+ company
722
+ end
723
+ def import_galenic_form(description)
724
+ galform = Drugs::GalenicForm.find_by_description(description)
725
+ unless galform
726
+ galform = Drugs::GalenicForm.search_by_description(description).find do |gf|
727
+ sim = ngram_similarity description, gf.description.de
728
+ sim > 0.75
729
+ end
730
+ if galform
731
+ galform.description.add_synonym description
732
+ galform.save
733
+ end
734
+ end
735
+ unless galform
736
+ galform = Drugs::GalenicForm.new
737
+ galform.description.de = description
738
+ galform.save
739
+ end
740
+ galform
741
+ end
742
+ def import_missing(agent, term, opts={})
743
+ @checked = "Searched for FIs/GIs for '#{term}'"
744
+ opts = { :skip_totals => true }.merge opts
745
+ agent = RenewableAgent.new agent
746
+ if result = get_search_result(agent, term, nil, opts)
747
+ result.each do |data|
748
+ company, product, galform = nil
749
+ sequence = nil
750
+ registration = data[:registration]
751
+ if registration && unique_registration?(registration)
752
+ sequence = Drugs::Sequence.find_by_code :value => registration
753
+ end
754
+ unless sequence
755
+ pname, gfname, cname = data[:data]
756
+ galform = import_galenic_form gfname
757
+ company = import_company cname
758
+ product = identify_product term, data, company
759
+ sequence = identify_sequence data, product, galform
760
+ end
761
+ if sequence
762
+ if opts[:repair]
763
+ pname, gfname, cname = data[:data]
764
+ if product = sequence.product
765
+ product.company ||= import_company cname
766
+ end
767
+ company_name = product.company.name.de.gsub(@stop, '').strip
768
+ official = pname[/^[^\d(]+/].strip
769
+ sequence.marketable = data[:marketable]
770
+ sequence.name.de = [ official, company_name ].join(' ')
771
+ agents = sequence.active_agents
772
+ relevance = composition_relevance agents, data
773
+ fix_composition agents, data
774
+ end
775
+ else
776
+ sequence = create_sequence term, data, company, product, galform
777
+ end
778
+ assign_registration sequence, data[:registration]
779
+ assign_info(:fachinfo, agent, data, sequence, opts)
780
+ assign_info(:patinfo, agent, data, sequence, opts)
781
+ import_package sequence, data, opts
782
+ end
783
+ end
784
+ report opts
785
+ end
786
+ def import_package(sequence, data, opts={})
787
+ pname, gfname, _ = data[:data]
788
+ if match = /^(?<name>.*?)\s*-\s*OP((?<size>\d+)|\((?<multi>\d+)x(?<size>\d+)\))(\((?<unit>[^)]+)\))?$/i.match(pname)
789
+ size = match[:size].to_i
790
+ multi = match[:multi] && match[:multi].to_i
791
+ package = sequence.packages.find do |pac|
792
+ pac.size == size
793
+ end
794
+ if package.nil?
795
+ @packages_created += 1
796
+ package = Drugs::Package.new
797
+ package.add_code Util::Code.new(:cid, "oddb#{package.uid}", 'DE')
798
+ package.name.de = match[:name]
799
+ part = Drugs::Part.new
800
+ part.size = size
801
+ part.unit = import_unit gfname
802
+ part.package = package
803
+ part.composition = sequence.compositions.first
804
+ part.save
805
+ package.sequence = sequence
806
+ package.save
807
+ end
808
+ package
809
+ end
810
+ end
811
+ def import_rtf(key, agent, url, term, opts = { :reparse => false,
812
+ :reload => false})
813
+ pklass = case key
814
+ when :fachinfo
815
+ FiParser
816
+ when :patinfo
817
+ PiParser
818
+ end
819
+ path = File.join @archive, File.basename(url)
820
+ doc = Text::Document.find_by_source(url)
821
+ ODDB.logger.debug('PharmNet') {
822
+ sprintf('Comparing %s-sources for %s', key, term) }
823
+ if(doc.nil? || (opts[:reparse] && !@sources[url]))
824
+ @sources.store url, true
825
+ io = nil
826
+ if(opts[:reload] || !File.exist?(path))
827
+ uri = URI.parse url
828
+ uri.scheme = 'http'
829
+ if uri.host.to_s.empty?
830
+ uri.host = 'gripsdb.dimdi.de'
831
+ end
832
+ ODDB.logger.debug('PharmNet') {
833
+ sprintf('Downloading %s for %s from %s', key, term, uri.to_s) }
834
+ file = agent.get uri.to_s
835
+ file.save path
836
+ ODDB.logger.debug('PharmNet') {
837
+ sprintf('Saving %s for %s in %s', key, term, path) }
838
+ io = StringIO.new(file.body)
839
+ else
840
+ ODDB.logger.debug('PharmNet') {
841
+ sprintf('Reading %s for %s from %s', key, term, path) }
842
+ io = File.open(path)
843
+ end
844
+ term = term.downcase.gsub(/[\s-]/, '.')
845
+ chapters = []
846
+ new = nil
847
+ while !term.empty? && chapters.size < 4
848
+ ODDB.logger.debug('PharmNet') {
849
+ sprintf('Parsing %s with term: %s', key, term) }
850
+ io.rewind
851
+ new = pklass.new(term).import io
852
+ chapters = new.chapters
853
+ term = term.gsub /(\A|\.)[^.]*$/, ''
854
+ end
855
+ ## ensure that chapter-headings are bold
856
+ new.chapters.each { |chapter|
857
+ if((paragraph = chapter.paragraphs.first) \
858
+ && (format = paragraph.formats.first))
859
+ format.augment "b"
860
+ end
861
+ }
862
+ new.source = url
863
+ if doc
864
+ doc.chapters.replace chapters
865
+ doc.save
866
+ else
867
+ doc = new
868
+ end
869
+ end
870
+ doc
871
+ end
872
+ def import_substance(name)
873
+ substance = Drugs::Substance.find_by_name name
874
+ unless(substance)
875
+ substance = Drugs::Substance.new
876
+ substance.name.de = name
877
+ substance.save
878
+ end
879
+ substance
880
+ end
881
+ def import_unit(name)
882
+ unit = Drugs::Unit.find_by_name name
883
+ unless unit
884
+ unit = Drugs::Unit.search_by_name(name).find do |unt|
885
+ sim = ngram_similarity name, unt.name.de
886
+ sim > 0.75
887
+ end
888
+ if unit
889
+ unit.name.add_synonym name
890
+ unit.save
891
+ end
892
+ end
893
+ unless unit
894
+ unit = Drugs::Unit.new
895
+ unit.name.de = name
896
+ unit.save
897
+ end
898
+ unit
899
+ end
900
+ def ngram_similarity(str1, str2, n=5)
901
+ str1 = u(str1).downcase.gsub(/[\s,.\-\/]+/, '')
902
+ str2 = u(str2).downcase.gsub(/[\s,.\-\/]+/, '')
903
+ if(str1.length < str2.length)
904
+ str1, str2 = str2, str1
905
+ end
906
+ parts = [ str1.length - n, 0 ].max + 1
907
+ count = 0
908
+ parts.times { |idx|
909
+ if(str2.include? str1[idx, n])
910
+ count += 1
911
+ end
912
+ }
913
+ count.to_f / parts
914
+ end
915
+ def parse_dose(str)
916
+ Drugs::Dose.new(str[/^\d*\.\d*/].to_f, str[/[^\d\.]+$/])
917
+ end
918
+ def process(agent, sequence, opts = { :replace => false,
919
+ :reload => false,
920
+ :remove => false,
921
+ :repair => false,
922
+ :reparse => false,
923
+ :reparse_patinfo => false,
924
+ :retries => 3,
925
+ :retry_unit => 60 })
926
+
927
+ return(reparse_fachinfo agent, sequence) if opts[:reparse] && !opts[:reparse_patinfo]
928
+ return(reparse_patinfo agent, sequence) if opts[:reparse_patinfo]
929
+ return unless sequence.fachinfo.empty? || sequence.patinfo.empty? \
930
+ || opts[:replace] || opts[:remove]
931
+ data = identify_details(agent, sequence_name(sequence), sequence, opts)
932
+
933
+ return(remove_infos sequence, opts) unless data
934
+
935
+ cutoff = composition_relevance(sequence.active_agents, data)
936
+ return(remove_infos sequence, opts) if(cutoff <= 1.25) # arbitrary value
937
+
938
+ assign_info(:fachinfo, agent, data, sequence, opts)
939
+ assign_info(:patinfo, agent, data, sequence, opts)
940
+
941
+ fix_composition sequence.active_agents, data if(opts[:repair])
942
+
943
+ # assign registration number if really good match
944
+ return if(cutoff < 2) # arbitrary value
945
+ assign_registration sequence, data[:registration]
946
+ rescue Timeout::Error, StandardError => error
947
+ ODDB.logger.error('PharmNet') {
948
+ sprintf("%s: %s", error.class, error.message) << "\n" << error.backtrace.join("\n")
949
+ }
950
+ (@errors[error.message[0,42]] ||= []).push [ sequence_name(sequence),
951
+ error.message, error.backtrace.find { |ln| /pharmnet/.match ln }.to_s.strip ]
952
+ end
953
+ def remove_info(key, sequence, opts)
954
+ info = sequence.send(key)
955
+ if opts[:remove] && info.de
956
+ @removed[key] += 1
957
+ ODDB.logger.debug('PharmNet') {
958
+ sprintf('Removing Fachinfo from %s', sequence_name(sequence))
959
+ }
960
+ info.de = nil
961
+ sequence.save
962
+ elsif info.de
963
+ @not_removed[key] += 1
964
+ end
965
+ end
966
+ def remove_infos(sequence, opts)
967
+ remove_info :fachinfo, sequence, opts
968
+ remove_info :patinfo, sequence, opts
969
+ end
970
+ def reparse_fachinfo(agent, sequence)
971
+ if((info = sequence.fachinfo.de) && (source = info.source) \
972
+ && (doc = import_rtf :fachinfo, agent, source, sequence_name(sequence),
973
+ :reparse => true))
974
+ @reparsed_fis += 1
975
+ info.chapters.replace doc.chapters
976
+ info.save
977
+ end
978
+ end
979
+ def reparse_patinfo(agent, sequence)
980
+ if((info = sequence.patinfo.de) && (source = info.source) \
981
+ && (doc = import_rtf :patinfo, agent, source, sequence_name(sequence),
982
+ :reparse => true))
983
+ @reparsed_pis += 1
984
+ info.chapters.replace doc.chapters
985
+ info.save
986
+ end
987
+ end
988
+ def report opts={}
989
+ fi_sources = { }
990
+ pi_sources = { }
991
+ fi_count = pi_count = 0
992
+ unless opts[:skip_totals]
993
+ Drugs::Sequence.all { |sequence|
994
+ if(doc = sequence.fachinfo.de)
995
+ fi_count += 1
996
+ fi_sources[doc.source] = true
997
+ end
998
+ if(doc = sequence.patinfo.de)
999
+ pi_count += 1
1000
+ pi_sources[doc.source] = true
1001
+ end
1002
+ }
1003
+ end
1004
+ lines = [ @checked,
1005
+ "",
1006
+ "Assigned #{@assigned[:fachinfo]} Fachinfos",
1007
+ "Removed #{@removed[:fachinfo]} Fachinfos",
1008
+ "Kept #{@not_removed[:fachinfo]} unconfirmed Fachinfos",
1009
+ ("Total: #{fi_sources.size} Fachinfos linked to #{fi_count} Sequences" \
1010
+ unless opts[:skip_totals]),
1011
+ "",
1012
+ "Assigned #{@assigned[:patinfo]} Patinfos",
1013
+ "Removed #{@removed[:patinfo]} Patinfos",
1014
+ "Kept #{@not_removed[:patinfo]} unconfirmed Patinfos",
1015
+ ("Total: #{pi_sources.size} Patinfos linked to #{pi_count} Sequences" \
1016
+ unless opts[:skip_totals]),
1017
+ "",
1018
+ "Created #@products_created Products",
1019
+ "Created #@sequences_created Sequences",
1020
+ "Created #@packages_created Packages",
1021
+ "",
1022
+ "Reparsed #@reparsed_fis Fachinfos",
1023
+ "Reparsed #@reparsed_pis Patinfos",
1024
+ "Repaired #@repaired Active Agents",
1025
+ "",
1026
+ "Errors: #{@errors.values.inject(0) do |inj, errs| inj + errs.size end}",
1027
+ ].compact
1028
+ errors = []
1029
+ @errors.sort.each do |key, instances|
1030
+ heading = "#{instances.size} x #{key}"
1031
+ lines.push " - #{heading}"
1032
+ errors.push "", "#{heading}:"
1033
+ if msg = ERROR_EXPLANATIONS[key]
1034
+ errors.push "This means that #{msg}"
1035
+ end
1036
+ errors.push ''
1037
+ errors.concat(instances.collect do |name, message, line, link|
1038
+ sprintf "%s: %s (%s) -> http://gripsdb.dimdi.de%s",
1039
+ name, message, line, link
1040
+ end)
1041
+ end
1042
+ lines.concat errors
1043
+ end
1044
+ def result_page(form, term)
1045
+ form.field('term').value = term
1046
+ form.submit
1047
+ end
1048
+ def search(agent, term, sequence=nil, opts={})
1049
+ term = term.downcase
1050
+ @result_cache.fetch(term) do
1051
+ if(minimal = term[0,3])
1052
+ @result_cache.delete_if { |key, _|
1053
+ key < minimal
1054
+ }
1055
+ end
1056
+ @search_form ||= get_search_form agent
1057
+ ## if we need to repair the active agents, we want all results, otherwise only
1058
+ # those that have a Fach- or PatInfo to parse.
1059
+ fi_only = opts[:info_unrestricted] \
1060
+ || (opts[:repair] && sequence && sequence.active_agents.any? { |act|
1061
+ act.dose.qty == 0 }) ? 'NO_RESTRICTION' : 'YES'
1062
+ set_fi_only(@search_form, fi_only)
1063
+ details = agent.transact {
1064
+ page = result_page @search_form, term
1065
+ if(found = _search_invalid? page, term)
1066
+ ODDB.logger.error('PharmNet') {
1067
+ sprintf "Searched for '%s' but got result for '%s' - creating new session",
1068
+ term, found
1069
+ }
1070
+ agent.renew!
1071
+ @search_form = get_search_form agent
1072
+ set_fi_only(@search_form, fi_only)
1073
+ page = result_page @search_form, term
1074
+ if(_search_invalid? page, term)
1075
+ return []
1076
+ end
1077
+ end
1078
+ page.save @latest
1079
+ result = extract_result agent, page
1080
+ result.collect do |data|
1081
+ dpg = get_details agent, page, data
1082
+ detail = data.merge extract_details(dpg)
1083
+ detail.delete :href
1084
+ detail
1085
+ end
1086
+ }
1087
+ @result_cache.store term, details
1088
+ end
1089
+ end
1090
+ def _search_invalid?(page, term)
1091
+ div = (page/"div.wbsectionsubtitlebar").last
1092
+ if(div.nil?)
1093
+ ''
1094
+ elsif(!/Arzneimittelname:\s#{Regexp.escape(term)}\?/i.match(div.inner_text))
1095
+ div.inner_text[/Arzneimittelname:[^?]+/]
1096
+ end
1097
+ end
1098
+ def sequence_name sequence
1099
+ if sequence
1100
+ if name = sequence.name.de
1101
+ name
1102
+ elsif product = sequence.product
1103
+ product.name.de
1104
+ end
1105
+ end
1106
+ end
1107
+ def set_fi_only(form, status="YES")
1108
+ form.radiobuttons.each do |b|
1109
+ if b.name == "WFTYP" && b.value == status
1110
+ b.check
1111
+ end
1112
+ end
1113
+ end
1114
+ def suitable_data(comparison, selection, opts = {})
1115
+ max = 0
1116
+ sums = []
1117
+ preselection = []
1118
+ ODDB.logger.debug('PharmNet') {
1119
+ "Checking for suitable data in #{selection.size} results"
1120
+ }
1121
+ selection.each_with_index { |data, idx|
1122
+ if(dists = _suitable_data(data, comparison, opts))
1123
+ sum = dists.inject { |a,b| a+b }
1124
+ max = sum if sum > max
1125
+ sums.push sum
1126
+ preselection.push data
1127
+ end
1128
+ }
1129
+ ODDB.logger.debug('PharmNet') {
1130
+ "Found a preselection of #{preselection.size} results"
1131
+ }
1132
+ result = []
1133
+ sums.each_with_index { |sum, idx|
1134
+ if sum == max
1135
+ result.push preselection[idx]
1136
+ end
1137
+ }
1138
+ ODDB.logger.debug('PharmNet') {
1139
+ "Returning the best #{result.size} results"
1140
+ }
1141
+ result
1142
+ end
1143
+ def _suitable_data(data, comparison, opts)
1144
+ opts[:cutoff] ||= 0.25
1145
+ idx = 0
1146
+ raw = data[:data].dup
1147
+ comp = comparison.dup
1148
+
1149
+ unless(opts[:keep_dose])
1150
+ part = Regexp.escape(raw[1].to_s).gsub('\ ', ')|(')
1151
+ ptrn = /(#{part})|(\b\d+\s*m?g(\s*\/\s*\d+\s*h)?)[\-\s]*/i
1152
+ raw[0] = raw[0].gsub(ptrn, '')
1153
+ comp[0] = comp[0].gsub(ptrn, '')
1154
+ end
1155
+
1156
+ tabl = /([a-z]{4,})tab.*/i
1157
+ raw[1] = raw[1].to_s.gsub(tabl, '\1')
1158
+ # Import::Csv::ProductInfos passes a comparison without Galenic Form if
1159
+ # no suitable data is found on the first try
1160
+ if comp[1]
1161
+ comp[1] = comp[1].to_s.gsub(tabl, '\1')
1162
+ end
1163
+ dists = raw.collect { |str|
1164
+ str = str.to_s
1165
+ othr = comparison[idx]
1166
+ other = othr ? othr.to_s : str
1167
+ idx += 1
1168
+
1169
+ relevance = ngram_similarity str.gsub(@stop, ''), other.gsub(@stop, '')
1170
+ return if relevance < opts[:cutoff]
1171
+ relevance
1172
+ }
1173
+ if(subcount = opts[:subcount])
1174
+ cdist = (comp = data[:composition]) ? (subcount - comp.size).abs : subcount
1175
+ dists.push(cdist) unless cdist > 0
1176
+ else
1177
+ dists
1178
+ end
1179
+ end
1180
+ def unique_registration? code
1181
+ !/^EU/.match code.to_s
1182
+ end
1183
+ end
1184
+ end
1185
+ end
1186
+ end