oddb2xml 2.0.5 → 2.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,15 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 4e7b29934682850002244797289099295579fb51
4
- data.tar.gz: a3d02cc3c125315dc26938cb6ce77ec033f98561
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MDA5ZTJmMjQ1NzlhMWM0ZjE5YzY5NjA2NWFjNGFlYjQxMjI0NjIzYw==
5
+ data.tar.gz: !binary |-
6
+ MmUxZWYwYTBiYjkyMjdiMGMzM2QwZjQwOWI5MTZjZTIxMmQ2YzhhYw==
5
7
  SHA512:
6
- metadata.gz: 035cfd64bf3ffffa1a1e0b99c41331326dbb4cf181033c73491c89d81a785f48033e269cd004a262ba646996374c2807229960a140b9b839dca9c2adca528995
7
- data.tar.gz: 2f50567e9e4d2e84e1bbf1ca34ae185d1b7b91f323772bde33e29059e56d7d99a2d552d8e40dc1f2d15a0eb7143d60db64d8ace9caf6b42641f1340437c70391
8
+ metadata.gz: !binary |-
9
+ MDBiZDcyMzQyZTMzMDhiNDFmNGQxOTAwOTc2MjA1OTY4NjMyNzhiNWE3NzI4
10
+ ODVlM2RmMTg2MTRkODdjODhlM2ExNDVkOTUyOTgwZTM3MzY1MzA1Y2Q2MmRi
11
+ NjI4Zjc0OTlkM2M0NTIzMmZlOTlmYzcwMGQ0YTdlMTc5NjZiNjY=
12
+ data.tar.gz: !binary |-
13
+ OGY4NThmMTRhNDVhY2ZlYzdmZmYwMThjMGY0NTM5MmJhYjMxYThjOTE4ZTUy
14
+ MmYwYzlhOWI5OTQ5OWQ0MDllMjk2YmM3OGY3YzUzMDE4MTM0YTYzOTRlY2Uz
15
+ NzM0NTQwZTlhOWQwZjMwZDlmMGYzMWM2ZmIzZDhlZjlmMDE1MjQ=
data/Gemfile.lock CHANGED
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- oddb2xml (2.0.5)
4
+ oddb2xml (2.0.6)
5
5
  archive-tar-minitar (~> 0.5.2)
6
6
  mechanize (~> 2.5.1)
7
7
  nokogiri (~> 1.5.10)
8
+ parslet (~> 1.7.0)
8
9
  rubyXL (~> 3.3.1)
9
10
  rubyzip (~> 1.1.3)
10
11
  savon (~> 2.4.0)
@@ -14,11 +15,12 @@ PATH
14
15
  GEM
15
16
  remote: https://rubygems.org/
16
17
  specs:
17
- addressable (2.3.6)
18
+ addressable (2.3.8)
18
19
  akami (1.2.2)
19
20
  gyoku (>= 0.4.0)
20
21
  nokogiri
21
22
  archive-tar-minitar (0.5.2)
23
+ blankslate (3.1.3)
22
24
  builder (3.2.2)
23
25
  coderay (1.1.0)
24
26
  columnize (0.9.0)
@@ -29,15 +31,16 @@ GEM
29
31
  debugger-linecache (~> 1.2.0)
30
32
  debugger-ruby_core_source (~> 1.3.5)
31
33
  debugger-linecache (1.2.0)
32
- debugger-ruby_core_source (1.3.7)
34
+ debugger-ruby_core_source (1.3.8)
33
35
  diff-lcs (1.2.5)
34
- domain_name (0.5.23)
36
+ domain_name (0.5.24)
35
37
  unf (>= 0.0.5, < 1.0.0)
36
38
  gyoku (1.1.1)
37
39
  builder (>= 2.1.2)
38
40
  httpi (2.1.1)
39
41
  rack
40
42
  rubyntlm (~> 0.3.2)
43
+ json (1.8.2)
41
44
  mechanize (2.5.1)
42
45
  domain_name (~> 0.5, >= 0.5.1)
43
46
  mime-types (~> 1.17, >= 1.17.2)
@@ -53,6 +56,8 @@ GEM
53
56
  nokogiri (1.5.11)
54
57
  nori (2.3.0)
55
58
  ntlm-http (0.1.1)
59
+ parslet (1.7.0)
60
+ blankslate (>= 2.0, <= 4.0)
56
61
  pry (0.10.1)
57
62
  coderay (~> 1.1.0)
58
63
  method_source (~> 0.8.1)
@@ -63,20 +68,22 @@ GEM
63
68
  rack (1.6.0)
64
69
  rake (10.4.2)
65
70
  rdoc (4.2.0)
66
- rspec (3.1.0)
67
- rspec-core (~> 3.1.0)
68
- rspec-expectations (~> 3.1.0)
69
- rspec-mocks (~> 3.1.0)
70
- rspec-core (3.1.7)
71
- rspec-support (~> 3.1.0)
72
- rspec-expectations (3.1.2)
71
+ json (~> 1.4)
72
+ rspec (3.2.0)
73
+ rspec-core (~> 3.2.0)
74
+ rspec-expectations (~> 3.2.0)
75
+ rspec-mocks (~> 3.2.0)
76
+ rspec-core (3.2.3)
77
+ rspec-support (~> 3.2.0)
78
+ rspec-expectations (3.2.1)
73
79
  diff-lcs (>= 1.2.0, < 2.0)
74
- rspec-support (~> 3.1.0)
75
- rspec-mocks (3.1.3)
76
- rspec-support (~> 3.1.0)
77
- rspec-support (3.1.2)
80
+ rspec-support (~> 3.2.0)
81
+ rspec-mocks (3.2.1)
82
+ diff-lcs (>= 1.2.0, < 2.0)
83
+ rspec-support (~> 3.2.0)
84
+ rspec-support (3.2.2)
78
85
  ruby-ole (1.2.11.8)
79
- rubyXL (3.3.6)
86
+ rubyXL (3.3.8)
80
87
  nokogiri (>= 1.4.4)
81
88
  rubyzip (>= 1.1.6)
82
89
  rubyntlm (0.3.4)
@@ -93,16 +100,16 @@ GEM
93
100
  sax-machine (0.1.0)
94
101
  nokogiri (> 0.0.0)
95
102
  slop (3.6.0)
96
- spreadsheet (1.0.0)
103
+ spreadsheet (1.0.3)
97
104
  ruby-ole (>= 1.0)
98
105
  unf (0.1.4)
99
106
  unf_ext
100
- unf_ext (0.0.6)
107
+ unf_ext (0.0.7.1)
101
108
  wasabi (3.2.3)
102
109
  httpi (~> 2.0)
103
110
  mime-types (< 2.0.0)
104
111
  nokogiri (>= 1.4.0)
105
- webmock (1.20.4)
112
+ webmock (1.21.0)
106
113
  addressable (>= 2.3.6)
107
114
  crack (>= 0.3.2)
108
115
  webrobots (0.1.1)
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ === 2.0.6 / 27.04.2015
2
+
3
+ * Remove trailing spaces in names imported from ZurRose transfer.dat
4
+ * Use ISO-8859-14 when reading ZurRose transfer.dat to avoid unreadable characters, eg. Ethacridin
5
+
1
6
  === 2.0.5 / 31.03.2015
2
7
 
3
8
  * --calc adds boolean flag is_active_agent
@@ -0,0 +1,50 @@
1
+ h1. Dokumentation für oddb2xml --calc
2
+
3
+ Wenn man oddb2xml mit der Option @--calc@ aufruft, wird eine Datei @oddb_calc.xml@ erstellt, welche aufgrund diverser offener Datenquellen, (u.a. "Excel-Version Zugelassene Verpackungen":https://www.swissmedic.ch/arzneimittel/00156/00221/00222/00230/index.html?lang=de ) die Zusammensetzung aller in der Schweiz öffentlich zugelassener Medikamente im XML-Format erstellt.
4
+
5
+ h2. Ziel
6
+
7
+ Die Swissmedic verpackt in der Excel-Datei für die Packungsbsbeschreibung in den Spalte Q @Zusammensetzung@, eine Menge an Informationen, aus welchen Bestandteilen ein Medikament besteht. Zu einem grossen Teil folgt dies einer Syntax, für welche keine öffentlich zugängliche Dokumention zu bestehen scheint. Deshalb wurde für das Projekt ODDB.org anfangs 2015 beschlossen, die Analyse dieses Feldes von einem auf (was SW-Ingenieure) regulären Ausdrücken basierenden Algorithum auf einen echten Parser umzustellen.
8
+
9
+ In diesem Dokument versuchen wir die dabei erkannten Elemente kurz zu dokumentieren und Grenzfälle und Entscheide anhand von Beispielen (mit IKSNR und Name identifiert) zu begründen.
10
+
11
+
12
+ h2. Syntax
13
+
14
+ Die Syntax wird in einer Datei "compositions_syntax":https://raw.githubusercontent.com/ngiger/oddb2xml/master/lib/oddb2xml/compositions_syntax.rb beschrieben.
15
+
16
+ * ratio: Falls eine Zeile in der Zusammensetzung sow was wie @ratio: 1:10@ oder @ratio: 1:1.5-2.4@ enthält, wird das Feld "more_info" entsprechend gesetzt.
17
+
18
+ * Namen mit Zahlen. Beschloss dass @Glyceroli Monostearas 33-45@ eine Name und nicht Glyceroli Monostearas von 33 bis 45 von irgendwas ist.
19
+
20
+ h2. Gebrauchte Abkürzungen und Schlüsselworte
21
+
22
+ * q.s.
23
+ * pro praeparatione
24
+ * excipiens
25
+ * pro compresso obducto
26
+ * pro compresso
27
+ * epro praeparatione
28
+ * ad pulverem
29
+ * pro charta
30
+ * ad globulos
31
+ * aqua ad iniectabilia q.s. ad solutionem
32
+ * solvens (i.v.): aqua ad iniectabilia
33
+ * ad solutionem
34
+ * q.s. ad
35
+ * aqua q.s. ad
36
+ * saccharum ad
37
+ * aether q.s.
38
+ * aqua ad iniectabilia
39
+ * q.s. pro praeparatione
40
+ * ana partes
41
+ * et oder @,@ trennen Substanzen
42
+ * ut vorgehende Substanz wird via nachstehend aufgeführte Salze(e) aufgenommen
43
+
44
+
45
+ h2. Vorgeschlag für Bereinigung
46
+
47
+ * SwissmedicErrorHandler
48
+ * Corresp: als Label für Bestandteile
49
+ * corresp. für zugehörende Substanz
50
+ * @<@ durch @max.@ ersetzen (1 Mal)
@@ -647,6 +647,10 @@ module Oddb2xml
647
647
  next unless row and row.cells[0] and row.cells[0].value and row.cells[0].value.to_i > 0
648
648
  iksnr = "%05i" % row.cells[0].value.to_i
649
649
  seqnr = "%02d" % row.cells[1].value.to_i
650
+ if row_nr % 250 == 0
651
+ puts "#{Time.now}: At row #{row_nr} iksnr #{iksnr}";
652
+ $stdout.sync
653
+ end
650
654
  no8 = sprintf('%05d',row.cells[0].value.to_i) + sprintf('%03d',row.cells[10].value.to_i)
651
655
  name = row.cells[2].value
652
656
  atc_code = row.cells[5] ? row.cells[5].value : nil
@@ -680,12 +684,13 @@ module Oddb2xml
680
684
  xml.COMPOSITIONS {
681
685
  info.compositions.each { |composition|
682
686
  xml.COMPOSITION {
683
- # xml.SOURCE composition.source # emit this if you want to debug the results
687
+ xml.CORRESP composition.corresp if composition.corresp
684
688
  xml.LABEL composition.label if composition.label
685
689
  xml.LABEL_DESCRIPTION composition.label_description if composition.label_description
686
690
  xml.SUBSTANCES {
687
691
  composition.substances.each { |substance|
688
692
  xml.SUBSTANCE {
693
+ xml.MORE_INFO substance.more_info.gsub('&gt','>').gsub('&amp', '&') if substance.more_info
689
694
  xml.SUBSTANCE_NAME substance.name
690
695
  xml.IS_ACTIVE_AGENT substance.is_active_agent
691
696
  if substance.unit
@@ -693,9 +698,9 @@ module Oddb2xml
693
698
  xml.UNIT substance.unit
694
699
  end
695
700
  if substance.chemical_substance
696
- xml.CHEMICAL_SUBSTANCE substance.chemical_substance
697
- xml.CHEMICAL_QTY substance.chemical_qty
698
- xml.CHEMICAL_UNIT substance.chemical_unit
701
+ xml.CHEMICAL_SUBSTANCE substance.chemical_substance.name
702
+ xml.CHEMICAL_QTY substance.chemical_substance.qty
703
+ xml.CHEMICAL_UNIT substance.chemical_substance.unit
699
704
  end
700
705
  }
701
706
  }
data/lib/oddb2xml/calc.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'oddb2xml/util'
4
- require 'oddb2xml/parse_compositions'
4
+ require 'oddb2xml/parslet_compositions'
5
5
  require 'yaml'
6
6
 
7
7
  module Oddb2xml
data/lib/oddb2xml/cli.rb CHANGED
@@ -330,6 +330,7 @@ module Oddb2xml
330
330
  lines << Calc.dump_new_galenic_forms
331
331
  lines << Calc.dump_names_without_galenic_forms
332
332
  lines << Calc.report_conversion
333
+ lines << ParseComposition.report
333
334
  end
334
335
  unless @options[:address]
335
336
  LANGUAGES.each do |lang|
@@ -0,0 +1,368 @@
1
+ # encoding: utf-8
2
+
3
+ # This file is shared since oddb2xml 2.0.0 (lib/oddb2xml/parse_compositions.rb)
4
+ # with oddb.org src/plugin/parse_compositions.rb
5
+ #
6
+ # It allows an easy parsing of the column P Zusammensetzung of the swissmedic packages.xlsx file
7
+ #
8
+
9
+ require 'parslet'
10
+ require 'parslet/convenience'
11
+ include Parslet
12
+
13
+ class CompositionParser < Parslet::Parser
14
+
15
+ # Single character rules
16
+ rule(:lparen) { str('(') }
17
+ rule(:rparen) { str(')') }
18
+ rule(:comma) { str(',') }
19
+
20
+ rule(:space) { match('\s').repeat(1) }
21
+ rule(:space?) { space.maybe }
22
+
23
+ # Things
24
+ rule(:digit) { match('[0-9]') }
25
+ rule(:digits) { digit.repeat(1) }
26
+ rule(:number) {
27
+ (
28
+ str('-').maybe >> (
29
+ str('0') | (match('[1-9]') >> match('[0-9\']').repeat)
30
+ ) >> (
31
+ (str('*') >> digit.repeat(1)).maybe >>
32
+ (match(['.,^']) >> digit.repeat(1)).repeat(1)
33
+ ).maybe >> (
34
+ match('[eE]') >> (str('+') | str('-')).maybe >> digit.repeat(1)
35
+ ).maybe
36
+ )
37
+ }
38
+ rule(:radio_isotop) { match['a-zA-Z'].repeat(1) >> lparen >> digits >> str('-') >> match['a-zA-Z'].repeat(1-3) >> rparen >>
39
+ ((space? >> match['a-zA-Z']).repeat(1)).repeat(0)
40
+ } # e.g. Xenonum (133-Xe) or yttrii(90-Y) chloridum zum Kalibrierungszeitpunkt
41
+ rule(:ratio_value) { match['0-9:\-\.'].repeat(1) >> space?} # eg. ratio: 1:1, ratio: 1:1.5-2.4., ratio: 1:0.68-0.95
42
+
43
+ # handle stuff like acidum 9,11-linolicum or 2,2'-methylen-bis(6-tert.-butyl-4-methyl-phenolum) specially. it must contain at least one a-z
44
+ rule(:umlaut) { match(['éàèèçïöäüâ']) }
45
+ rule(:identifier_D12) { match['a-zA-Z'] >> match['0-9'].repeat(1) }
46
+ rule(:identifier) { str('A + B') | str('ethanol.') | str('poloxamerum 238') | str('TM:') | str('&') | # TODO: why do we have to hard code these identifiers?
47
+ str('F.E.I.B.A.') | str('LA 25% TM') | str('50/50') | str('polysorbatum ') >> digit >> digit | str('q.s.') |
48
+ digit >> digit.maybe >> space >> str('per centum ') >> str('q.s.').maybe| str('1g/9.6 cm²') |
49
+ str('9 g/L 5.4 ml') |
50
+ str('spag.') | str('spp.') | str('ssp.') | str('deklar.') | # TODO: Sind diese Abkürzung wirklich Teil eines Substanznamens?
51
+ str('ca.') | str('var.') | str('spec.') |
52
+ identifier_D12 | identifier_without_comma | identifier_with_comma
53
+ }
54
+
55
+ rule(:identifier_with_comma) {
56
+ match['0-9,\-'].repeat(0) >> (match['a-zA-Z']|umlaut) >> (match(['_,']).maybe >> (match['0-9a-zA-Z\-\'\/'] | umlaut)).repeat(0)
57
+ }
58
+
59
+ rule(:identifier_without_comma) {
60
+ match['0-9\',\-'].repeat(0) >> (match['a-zA-Z']|umlaut) >> (match(['_']).maybe >> (match['0-9a-zA-Z\-\'\/'] | umlaut)).repeat(0) >>
61
+ lparen >> (rparen.absent? >> any).repeat(1) >> rparen
62
+ }
63
+ rule(:one_word) { match['a-zA-Z'] >> match['0-9'].repeat(1) | match['a-zA-Z'].repeat(1) }
64
+ rule(:in_parent) { lparen >> one_word.repeat(1) >> rparen }
65
+ rule(:words_nested) { one_word.repeat(1) >> in_parent.maybe >> space? >> one_word.repeat(0) }
66
+ # dose
67
+ # 150 U.I. hFSH et 150 U.I. hLH
68
+ rule(:dose_unit) { (str('cm²') |
69
+ str('g/dm²') |
70
+ str('g/l') |
71
+ str('g/L') |
72
+ str('% V/V') |
73
+ str('µg/24 h') |
74
+ str('µg/g') |
75
+ str('µg') |
76
+ str('ng') |
77
+ str('guttae') |
78
+ str('mg/g') |
79
+ str('mg/ml') |
80
+ str('MBq/ml') |
81
+ str('MBq') |
82
+ str('CFU') |
83
+ str('mg') |
84
+ str('Mg') |
85
+ str('kJ') |
86
+ str('G') |
87
+ str('g') |
88
+ str('l') |
89
+ str('µl') |
90
+ str('U. Ph. Eur.') |
91
+ str('ml') |
92
+ str('µmol') |
93
+ str('mmol/l') |
94
+ str('mmol') |
95
+ str('Mio CFU') |
96
+ str('Mio U.I.') |
97
+ str('Mio U.') |
98
+ str('Mio. U.I.') |
99
+ str('Mio. U.') |
100
+ str('Mia. U.I.') |
101
+ str('Mia. U.') |
102
+ str('U. Botox,') | # TODO: Should be U. Botox
103
+ str('U.I. hFSH') |
104
+ str('U.I. hCG') |
105
+ str('U.I. hLH') |
106
+ str('U.I.') |
107
+ str('U./ml') |
108
+ str('U.') |
109
+ str('Mia.') |
110
+ str('Mrd.') |
111
+ str('% m/m') |
112
+ str('% m/m') |
113
+ str('%')
114
+ ).as(:unit) }
115
+ rule(:qty_range) { (number >> space? >> (str('+/-') | str(' - ') | str(' -') | str('-') | str('±') ) >> space? >> number).as(:qty_range) }
116
+ rule(:qty_unit) { dose_qty >> (space >> dose_unit).maybe }
117
+ rule(:dose_qty) { number.as(:qty) }
118
+ rule(:min_max) { str('mind.') | (str('min.') | str('max.') | str('ca.') | str('<') ) >> space? } # TODO: swissmedic should replace mind. -> min.
119
+ # 75 U.I. hFSH et 75 U.I. hLH
120
+ rule(:dose_fsh) { qty_unit >> space >> str('et') >> space >> qty_unit.as(:dose_right) }
121
+ rule(:dose_per) { (digits >> str('/') >> digits).as(:qty)}
122
+ rule(:dose) { dose_fsh |
123
+ dose_per |
124
+ ( min_max.maybe >>
125
+ ( (qty_range >> (space >> dose_unit).maybe) | (qty_unit | dose_qty |dose_unit)) >> space? )
126
+ }
127
+ rule(:dose_with_unit) { min_max.maybe >>
128
+ dose_fsh |
129
+ ( qty_range >> space >> dose_unit |
130
+ dose_qty >> space >> dose_unit
131
+ ) >>
132
+ space?
133
+ }
134
+ rule(:operator) { match('[+]') >> space? }
135
+
136
+ # Grammar parts
137
+ rule(:useage) { (any >> str('berzug:')) | # match Überzug
138
+ str('antiox.:') |
139
+ str('arom.:') |
140
+ str('conserv.:') |
141
+ str('color.:')
142
+ }
143
+ rule(:lebensmittel_zusatz) { str('E').as(:lebensmittel_zusatz) >> space >>
144
+ (digits >> match['(a-z)'].repeat(0,3)).as(:digits) >>
145
+ (space >> dose.as(:dose_lebensmittel_zusatz)).maybe >> space?
146
+
147
+ } # Match Wirkstoffe like E 270
148
+ rule(:der) { (str('DER:') >> space >> digit >> match['0-9\.\-:'].repeat).as(:der) >> space?
149
+ } # DER: 1:4 or DER: 3.5:1 or DER: 6-8:1 or DER: 4.0-9.0:1'
150
+ rule(:forbidden_in_substance_name) {
151
+ useage |
152
+ min_max |
153
+ str('corresp. ca.,') |
154
+ str(', corresp.') |
155
+ str('corresp.') |
156
+ str('ratio:') |
157
+ str('Mio ') |
158
+ str('et ') |
159
+ str('ut ') |
160
+ str('Beutel: ') |
161
+ str('ut alia: ') |
162
+ str('pro dosi') |
163
+ str('pro capsula') |
164
+ str('pro vitroe') |
165
+ (digits.repeat(1) >> space >> str(':')) | # match 50 %
166
+ str('ad globulos') |
167
+ str('ana ') |
168
+ str('ana partes') |
169
+ str('partes') |
170
+ str('ad pulverem') |
171
+ str('ad suspensionem') |
172
+ str('q.s. ad ') |
173
+ str('q.s. pro ') |
174
+ str('ad solutionem') |
175
+ str('ad emulsionem') |
176
+ str('excipiens')
177
+ }
178
+ rule(:name_without_parenthesis) {
179
+ (
180
+ (str('(') | forbidden_in_substance_name).absent? >>
181
+ (radio_isotop | str('> 1000') | str('> 500') | identifier.repeat(1)) >>
182
+ space?
183
+ ).repeat(1)
184
+ }
185
+
186
+ rule(:part_with_parenthesis) { lparen >> ( (lparen | rparen).absent? >> any).repeat(1) >>
187
+ (part_with_parenthesis | rparen >> str('-like:') | rparen ) >> space?
188
+ }
189
+ rule(:name_with_parenthesis) {
190
+ forbidden_in_substance_name.absent? >>
191
+ ((comma | lparen).absent? >> any).repeat(0) >> part_with_parenthesis >>
192
+ (forbidden_in_substance_name.absent? >> (identifier.repeat(1) | part_with_parenthesis | rparen) >> space?).repeat(0)
193
+ }
194
+ rule(:substance_name) { (
195
+ der |
196
+ name_with_parenthesis |
197
+ name_without_parenthesis
198
+ ) >>
199
+ str('pro dosi').maybe >> space?
200
+ }
201
+ rule(:simple_substance) { substance_name.as(:substance_name) >> space? >> dose.as(:dose).maybe}
202
+ rule(:simple_subtance_with_digits_in_name_and_dose) {
203
+ substance_lead.maybe >> space? >>
204
+ (name_without_parenthesis >> space? >> ((digits.repeat(1) >> (str(' %') | str('%')) | digits.repeat(1)))).as(:substance_name) >>
205
+ space >> dose_with_unit.as(:dose)
206
+ }
207
+
208
+
209
+ rule(:pro_dose) { str('pro') >> space >> dose.as(:dose_corresp) }
210
+
211
+ # TODO: what does ut alia: impl?
212
+ rule(:substance_ut) {
213
+ (substance_lead.maybe >> simple_substance).as(:substance_ut) >>
214
+ (space? >> (str('pro dosi ut ') | str('ut ') ) >>
215
+ space? >> str('alia:').absent? >>
216
+ (excipiens |
217
+ substance_name >> space? >> str('corresp.') >> space? >> substance_lead.maybe >> space? >> simple_substance |
218
+ simple_substance
219
+ ).as(:for_ut)
220
+ ).repeat(1) >>
221
+ space? # >> str('alia:').maybe >> space?
222
+ }
223
+
224
+ rule(:substance_more_info) { # e.g. "acari allergeni extractum 5000 U.:
225
+ (str('ratio:').absent? >> (identifier|digits) >> space?).repeat(1).as(:more_info) >> space? >> (str('U.:') | str(':')| str('.:')) >> space?
226
+ }
227
+
228
+ rule(:dose_pro) { (
229
+ str('excipiens ad solutionem pro ') |
230
+ str('aqua q.s. ad gelatume pro ') |
231
+ str('aqua q.s. ad solutionem pro ') |
232
+ str('aqua q.s. ad suspensionem pro ') |
233
+ str('q.s. ad pulverem pro ') |
234
+ str('doses pro vase ') |
235
+ str('pro vase ') |
236
+ str('excipiens ad emulsionem pro ') |
237
+ str('excipiens ad pulverem pro ') |
238
+ str('aqua ad iniectabilia q.s. ad solutionem pro ')
239
+ ) >> dose.as(:dose_pro) >> space? >> ratio.as(:ratio).maybe
240
+ }
241
+
242
+ rule(:excipiens) { (dose_pro |
243
+ str('excipiens pro compresso obducto') |
244
+ str('excipiens pro compresso') |
245
+ str('excipiens pro praeparatione') |
246
+ str('excipiens') |
247
+ str('ad pulverem') |
248
+ str('pro charta') |
249
+ str('ad globulos') |
250
+ str('aqua ad iniectabilia q.s. ad solutionem') |
251
+ str('solvens (i.v.): aqua ad iniectabilia') |
252
+ str('ad solutionem') |
253
+ str('q.s. ad') |
254
+ str('aqua q.s. ad') |
255
+ str('saccharum ad') |
256
+ str('aether q.s.') |
257
+ str('pro vitro') |
258
+ str('aqua ad iniectabilia') |
259
+ str('pro praeparatione') |
260
+ str('q.s. pro praeparatione') |
261
+ str('ana partes')
262
+ ) >> space? >>
263
+ ( any.repeat(0) )
264
+ }
265
+
266
+ rule(:substance_lead) { useage.as(:more_info) >> space? |
267
+ str('Beutel:').as(:more_info) >> space? |
268
+ str('residui:').as(:more_info) >> space? |
269
+ str('mineralia').as(:mineralia) >> str(':') >> space? |
270
+ str('Solvens:').as(:solvens) >> space? |
271
+ substance_more_info
272
+ }
273
+ rule(:corresp_substance_label) {
274
+ str(', corresp. ca.,') |
275
+ str('corresp. ca.,') |
276
+ str('corresp.') |
277
+ str('corresp., ') |
278
+ str(', corresp.')
279
+ }
280
+
281
+ rule(:corresp_substance) {
282
+ (corresp_substance_label) >> space? >>
283
+ (
284
+ simple_substance.as(:substance_corresp) |
285
+ dose.as(:dose_corresp_2)
286
+ )
287
+ }
288
+
289
+ rule(:ratio) { str('ratio:') >> space >> ratio_value }
290
+
291
+ rule(:solvens) { (str('Solvens:') | str('Solvens (i.m.):'))>> space >> (any.repeat).as(:solvens) >> space? >>
292
+ (substance.as(:substance) >> str('/L').maybe).maybe >>
293
+ any.maybe
294
+ }
295
+ rule(:substance) {
296
+ simple_subtance_with_digits_in_name_and_dose |
297
+ useage.as(:more_info) >> space? >> excipiens |
298
+ ratio.as(:ratio) |
299
+ solvens |
300
+ der >> corresp_substance.maybe |
301
+ (str('potenziert mit:') >> space).maybe >> excipiens.as(:excipiens) |
302
+ substance_ut |
303
+ substance_lead.maybe >> space? >> lebensmittel_zusatz |
304
+ substance_lead.maybe >> space? >> simple_substance >> corresp_substance.maybe >> space? >> corresp_substance.maybe >> space? >> dose_pro.maybe >> str('pro dosi').maybe
305
+ }
306
+ rule(:histamin) { str('U = Histamin Equivalent Prick').as(:histamin) }
307
+ rule(:praeparatio){ ((one_word >> space?).repeat(1).as(:description) >> str(':') >> space?).maybe >>
308
+ (name_with_parenthesis | name_without_parenthesis).repeat(1).as(:substance_name) >>
309
+ number.as(:qty) >> space >> str('U.:') >> space? >>
310
+ ((identifier >> space?).repeat(1).as(:more_info) >> space?).maybe
311
+ }
312
+ rule(:substance_separator) { (str(', et ') | comma | str('et ') | str('ut alia: ')) >> space? }
313
+ rule(:one_substance) { (praeparatio | histamin | substance).as(:substance) >> space? >> ratio.as(:ratio).maybe }
314
+ # rule(:one_substance) { (substance_ut).as(:substance) } # >> str('.').maybe }
315
+ rule(:all_substances) { (one_substance >> substance_separator.maybe).repeat(1) }
316
+ rule(:composition) { all_substances }
317
+ rule(:long_labels) {
318
+ str('Praeparatio sicca cum solvens: praeparatio sicca:') |
319
+ str('Praeparatio cryodesiccata') >> (str(':').absent? >> any).repeat(0) >> str(':') |
320
+ str('Tela cum praeparatione (Panel ') >> digit >> str('):')
321
+ }
322
+ rule(:label_id) {
323
+ (
324
+ str('V') |
325
+ str('IV') |
326
+ str('III') |
327
+ str('II') |
328
+ str('I') |
329
+ str('A') |
330
+ str('B') |
331
+ str('C') |
332
+ str('D') |
333
+ str('E')
334
+ )
335
+ }
336
+ rule(:label_separator) { (str('):') | str(')')) }
337
+ rule(:label) { label_id.as(:label) >> space? >>
338
+ label_separator >> str(',').absent? >>
339
+ (space? >> (match(/[^:]/).repeat(0)).as(:label_description) >> str(':') >> space).maybe
340
+ }
341
+ rule(:leading_label) { label_id >> label_separator >> (str(' et ') | str(', ') | str(' pro usu: ') | space) >>
342
+ label_id >> label_separator >> any.repeat(1) |
343
+ long_labels.as(:label) |
344
+ label
345
+ }
346
+ rule(:corresp_label) {
347
+ str('doses ') |
348
+ str('Pulver: ') |
349
+ str('Diluens: ') |
350
+ str('Solvens (i.v.): ') |
351
+ str('Solvens (i.m.): ') |
352
+ str('Solvens: ') |
353
+ str('Solutio reconstituta:') |
354
+ str('Corresp., ') |
355
+ str('Corresp. ') |
356
+ str('corresp. ')
357
+ }
358
+ rule(:corresp_line) { corresp_label >> any.repeat(1).as(:corresp) |
359
+ ((label_id >> label_separator >> space? >> str('et ').maybe).repeat(1) >> any.repeat(1)).as(:corresp)
360
+ }
361
+
362
+ rule(:expression_comp) {
363
+ leading_label.maybe >> space? >> composition.as(:composition) >> space? >> str('.').maybe >> space? |
364
+ corresp_line
365
+ }
366
+ root :expression_comp
367
+ end
368
+
@@ -456,16 +456,25 @@ module Oddb2xml
456
456
  # see http://dev.ywesee.com/Bbmb/TransferDat
457
457
  def initialize(dat, extended = false)
458
458
  @@extended = extended
459
- @@error_file ||= File.open(File.join(WorkDir, "duplicate_ean13_from_zur_rose.txt"), 'w+')
459
+ @@error_file ||= File.open(File.join(WorkDir, "duplicate_ean13_from_zur_rose.txt"), 'wb+:ISO-8859-14')
460
460
  @@items_without_ean13s ||= 0
461
461
  @@duplicated_ean13s ||= 0
462
462
  @@zur_rose_items ||= 0
463
- @io = StringIO.new(dat) if dat
463
+ if dat
464
+ if File.exists?(dat)
465
+ @io = File.open(dat, 'rb:ISO-8859-14')
466
+ else
467
+ @io = StringIO.new(dat)
468
+ end
469
+ @io
470
+ else
471
+ nil
472
+ end
464
473
  end
465
474
  def to_hash
466
475
  data = {}
467
476
  while line = @io.gets
468
- line = line.chomp
477
+ line = line.encode('utf-8').gsub("\u0089", "‰").gsub("\u0092", '’').gsub("\u0096", '-').chomp
469
478
  next if line =~ /(ad us\.* vet)|(\(vet\))/i
470
479
  if @@extended
471
480
  next unless line =~ /(\d{13})(\d{1})$/
@@ -490,7 +499,7 @@ module Oddb2xml
490
499
  :line => line.chomp,
491
500
  :ean => ean13,
492
501
  :vat => line[96],
493
- :description => line[10..59], # .sub(/\s+$/, ''),
502
+ :description => line[10..59].sub(/\s+$/, ''),
494
503
  :additional_desc => '',
495
504
  :pharmacode => pharma_code,
496
505
  :price => sprintf("%.2f", line[60,6].gsub(/(\d{2})$/, '.\1').to_f),