ydocx 1.0.7 → 1.0.8

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,11 @@
1
+ === 1.0.8 / 04.05.2012
2
+
3
+ * Removed unnecessary when
4
+ * Updated help massage
5
+ * Updated white space strip at line head
6
+ * Added version option to commands
7
+ * Improved chapter matches for fi/pi templates
8
+
1
9
  === 1.0.7 / 04.05.2012
2
10
 
3
11
  * Updated chapter list style for fachinfo/patinfo
data/Manifest.txt CHANGED
@@ -4,7 +4,6 @@ README.txt
4
4
  Rakefile
5
5
  bin/docx2html
6
6
  bin/docx2xml
7
- lib/version.rb
8
7
  lib/ydocx.rb
9
8
  lib/ydocx/builder.rb
10
9
  lib/ydocx/document.rb
data/README.txt CHANGED
@@ -13,9 +13,10 @@
13
13
 
14
14
  == Usage
15
15
 
16
- * Usage: /usr/local/bin/docx2html file [options]
17
- -f, --format Format of style and chapter {(fi|fachinfo)|(pl|plain)}, default is FI.
16
+ * Usage: bin/docx2html file [options]
17
+ -f, --format Format of style and chapter {(fi|fachinfo)|(pi|patinfo)|(pl|plain)|none}, default fachinfo.
18
18
  -h, --help Display this help message.
19
+ -v, --version Show version.
19
20
 
20
21
  == Using the great libraries
21
22
 
data/lib/ydocx/command.rb CHANGED
@@ -1,13 +1,14 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
 
4
- require 'ydocx/document'
4
+ require 'ydocx'
5
5
 
6
6
  module YDocx
7
7
  class Command
8
8
  class << self
9
- @@help = /^\-(h|\-help)$/u
10
- @@format = /^\-(f|\-format)$/u
9
+ @@help = /^\-(h|\-help)$/u
10
+ @@format = /^\-(f|\-format)$/u
11
+ @@version = /^\-(v|\-version)$/u
11
12
  def error(message='')
12
13
  puts message
13
14
  puts "see `#{self.command} --help`"
@@ -18,9 +19,10 @@ module YDocx
18
19
  end
19
20
  def help
20
21
  banner = <<-BANNER
21
- Usage: #{$0} file [options]
22
- -f, --format Format of style and chapter {(fi|fachinfo)|(pl|plain)|none}, default fachinfo.
22
+ Usage: #{self.command} file [options]
23
+ -f, --format Format of style and chapter {(fi|fachinfo)|(pi|patinfo)|(pl|plain)|none}, default fachinfo.
23
24
  -h, --help Display this help message.
25
+ -v, --version Show version.
24
26
  BANNER
25
27
  puts banner
26
28
  exit
@@ -36,6 +38,8 @@ Usage: #{$0} file [options]
36
38
  argv = ARGV.dup
37
39
  if argv.empty? or argv[0] =~ @@help
38
40
  self.help
41
+ elsif argv[0] =~ @@version
42
+ self.version
39
43
  else
40
44
  file = argv.shift
41
45
  path = File.expand_path(file)
@@ -76,6 +80,10 @@ Usage: #{$0} file [options]
76
80
  end
77
81
  end
78
82
  end
83
+ def version
84
+ puts "#{self.command}: version #{VERSION}"
85
+ exit
86
+ end
79
87
  end
80
88
  end
81
89
  end
data/lib/ydocx/parser.rb CHANGED
@@ -31,8 +31,6 @@ module YDocx
31
31
  @result << parse_paragraph(node)
32
32
  when 'tbl'
33
33
  @result << parse_table(node)
34
- when 'pict'
35
- # pending
36
34
  when 'p'
37
35
  @result << parse_paragraph(node)
38
36
  else
@@ -80,9 +78,6 @@ module YDocx
80
78
  end
81
79
  text
82
80
  end
83
- def parse_as_block(r, text)
84
- nil # default no block element
85
- end
86
81
  def optional_escape(text)
87
82
  text.force_encoding('utf-8')
88
83
  # NOTE
@@ -150,6 +145,9 @@ module YDocx
150
145
  #p "char : " + @coder.decode("&#%s;" % code.hex.to_s)
151
146
  end
152
147
  end
148
+ def parse_block(node)
149
+ nil # default no block element
150
+ end
153
151
  def parse_image(r)
154
152
  if pict = r.xpath('w:pict') and
155
153
  shape = pict.xpath('v:shape') and
@@ -180,26 +178,29 @@ module YDocx
180
178
  end
181
179
  def parse_paragraph(node)
182
180
  content = []
183
- line_head = true
184
- pos = 0
185
- node.xpath('w:r').each do |r|
186
- unless r.xpath('w:t').empty?
187
- content << parse_text(r, (pos == 0)) # rm indent
188
- pos += 1
189
- else
190
- unless r.xpath('w:tab').empty?
191
- if content.last != @space and pos != 0 # ignore tab at line head
192
- content << @space
181
+ if block = parse_block(node)
182
+ content << block
183
+ else # as p
184
+ pos = 0
185
+ node.xpath('w:r').each do |r|
186
+ unless r.xpath('w:t').empty?
187
+ content << parse_text(r, (pos == 0)) # rm indent
188
+ pos += 1
189
+ else
190
+ unless r.xpath('w:tab').empty?
191
+ if content.last != @space and pos != 0 # ignore tab at line head
192
+ content << @space
193
+ pos += 1
194
+ end
195
+ end
196
+ unless r.xpath('w:sym').empty?
197
+ code = r.xpath('w:sym').first['char'].downcase # w:char
198
+ content << optional_replace(code)
193
199
  pos += 1
194
200
  end
195
- end
196
- unless r.xpath('w:sym').empty?
197
- code = r.xpath('w:sym').first['char'].downcase # w:char
198
- content << optional_replace(code)
199
- pos += 1
200
- end
201
- unless r.xpath('w:pict').empty?
202
- content << parse_image(r)
201
+ unless r.xpath('w:pict').empty?
202
+ content << parse_image(r)
203
+ end
203
204
  end
204
205
  end
205
206
  end
@@ -241,28 +242,21 @@ module YDocx
241
242
  def parse_text(r, lstrip=false)
242
243
  text = r.xpath('w:t').map(&:text).join('')
243
244
  text = optional_escape(text)
245
+ text = text.lstrip if lstrip
244
246
  if rpr = r.xpath('w:rPr')
245
247
  text = apply_fonts(rpr, text)
246
- if block = parse_as_block(r, text)
247
- block
248
- else
249
- # inline tag
250
- text = text.lstrip if lstrip
251
- text = apply_align(rpr, text)
252
- unless rpr.xpath('w:u').empty?
253
- text = markup(:span, text, {:style => "text-decoration:underline;"})
254
- end
255
- unless rpr.xpath('w:i').empty?
256
- text = markup(:em, text)
257
- end
258
- unless rpr.xpath('w:b').empty?
259
- text = markup(:strong, text)
260
- end
261
- text
248
+ text = apply_align(rpr, text)
249
+ unless rpr.xpath('w:u').empty?
250
+ text = markup(:span, text, {:style => "text-decoration:underline;"})
251
+ end
252
+ unless rpr.xpath('w:i').empty?
253
+ text = markup(:em, text)
254
+ end
255
+ unless rpr.xpath('w:b').empty?
256
+ text = markup(:strong, text)
262
257
  end
263
- else
264
- text
265
258
  end
259
+ text
266
260
  end
267
261
  end
268
262
  end
@@ -6,44 +6,45 @@ require 'cgi'
6
6
  module YDocx
7
7
  class Parser
8
8
  private
9
- def escape_as_id(text)
10
- CGI.escape(text.gsub(/&(.)uml;/, '\1').gsub(/\s*\/\s*|\/|\s+/, '_').gsub(/(\?|_$)/, '').downcase)
9
+ def escape_id(text)
10
+ CGI.escape(text.gsub(/&(.)uml;/, '\1e').gsub(/\s*\/\s*|\s+|\/|\-/, '_').gsub(/\./, '').downcase)
11
11
  end
12
- def parse_as_block(r, text)
13
- text = text.strip
12
+ def parse_block(node)
13
+ text = node.inner_text.strip
14
+ text = optional_escape text
14
15
  # TODO
15
16
  # Franzoesisch
16
17
  chapters = {
17
- 'Dos./Anw.' => /^Dosierung\s*(\/|und)\s*Anwendung/u, # 5
18
- 'Eigensch.' => /^Eigenschaften\s*\/\s*Wirkungen($|\s*\(\s*(ATC\-Code|Wirkungsmechanismus|Pharmakodyamik|Klinische\s+Wirksamkeit)\s*\)\s*$)|^Propri.t.s/iu, # 13
18
+ 'Name' => /^Name\s+des\s+Pr&auml;parates$/u, # 1
19
+ 'Zusammens.' => /^Zusammensetzung($|\s*\/\s*(Wirkstoffe|Hilsstoffe)$)/u, # 2
19
20
  'Galen.Form' => /^Galenische\s+Form\s*(und|\/)\s*Wirkstoffmenge\s+pro\s+Einheit$|^Forme\s*gal.nique/iu, # 3
20
21
  'Ind./Anw.m&ouml;gl.' => /^Indikationen(\s+|\s*(\/|und)\s*)Anwendungsm&ouml;glichkeiten$|^Indications/u, # 4
21
- 'Interakt.' => /^Interaktionen$|^Interactions/u, # 8
22
+ 'Dos./Anw.' => /^Dosierung\s*(\/|und)\s*Anwendung/u, # 5
22
23
  'Kontraind.' => /^Kontraindikationen($|\s*\(\s*absolute\s+Kontraindikationen\s*\)$)/u, # 6
23
- 'Name' => /^Name\s+des\s+Pr&auml;parates$/u, # 1
24
- 'Packungen' => /^Packungen($|\s*\(\s*mit\s+Angabe\s+der\s+Abgabekategorie\s*\)$)/u, # 18
25
- 'Pr&auml;klin.' => /^Pr&auml;klinische\s+Daten$/u, # 15
26
- 'Pharm.kinetik' => /^Pharmakokinetik($|\s*\((Absorption,\s*Distribution,\s*Metabolisms,\s*Elimination\s|Kinetik\s+spezieller\s+Patientengruppen)*\)$)|^Pharmacocin.tique?/iu, # 14
27
- 'Sonstige H.' => /^Sonstige\s*Hinweise($|\s*\(\s*(Inkompatibilit&auml;ten|Beeinflussung\s*diagnostischer\s*Methoden|Haltbarkeit|Besondere\s*Lagerungshinweise|Hinweise\s+f&uuml;r\s+die\s+Handhabung)\s*\)$)|^Remarques/u, # 16
24
+ 'Warn.hinw.' => /^Warnhinweise\s+und\s+Vorsichtsmassnahmen($|\s*\/\s*(relative\s+Kontraindikationen|Warnhinweise\s*und\s*Vorsichtsmassnahmen)$)/u, # 7
25
+ 'Interakt.' => /^Interaktionen$|^Interactions/u, # 8
28
26
  'Schwangerschaft' => /^Schwangerschaft(,\s*|\s*\/\s*|\s+und\s+)Stillzeit$/u, # 9
29
- 'Stand d. Info.' => /^Stand\s+der\s+Information$|^Mise\s+.\s+jour$/iu, # 20
27
+ 'Fahrt&uuml;cht.' => /^Wirkung\s+auf\s+die\s+Fahrt&uuml;chtigkeit\s+und\s+auf\s+das\s+Bedienen\s+von\s+Maschinen$/u, # 10
30
28
  'Unerw.Wirkungen' => /^Unerw&uuml;nschte\s+Wirkungen$/u, # 11
31
29
  '&Uuml;berdos.' => /^&Uuml;berdosierung$|^Surdosage$/u, # 12
32
- 'Warn.hinw.' => /^Warnhinweise\s+und\s+Vorsichtsmassnahmen($|\s*\/\s*(relative\s+Kontraindikationen|Warnhinweise\s*und\s*Vorsichtsmassnahmen)$)/u, # 7
33
- 'Fahrt&uuml;cht.' => /^Wirkung\s+auf\s+die\s+Fahrt&uuml;chtigkeit\s+und\s+auf\s+das\s+Bedienen\s+von\s+Maschinen$/u, # 10
34
- 'Swissmedic-Nr.' => /^Zulassungsnummer($|\s*\(\s*Swissmedic\s*\)$)/u, # 17
30
+ 'Eigensch.' => /^Eigenschaften\s*\/\s*Wirkungen($|\s*\(\s*(ATC\-Code|Wirkungsmechanismus|Pharmakodyamik|Klinische\s+Wirksamkeit)\s*\)\s*$)|^Propri.t.s/iu, # 13
31
+ 'Pharm.kinetik' => /^Pharmakokinetik($|\s*\((Absorption,\s*Distribution,\s*Metabolisms,\s*Elimination\s|Kinetik\s+spezieller\s+Patientengruppen)*\)$)|^Pharmacocin.tique?/iu, # 14
32
+ 'Pr&auml;klin.' => /^Pr&auml;klinische\s+Daten$/u, # 15
33
+ 'Sonstige H.' => /^Sonstige\s*Hinweise($|\s*\(\s*(Inkompatibilit&auml;ten|Beeinflussung\s*diagnostischer\s*Methoden|Haltbarkeit|Besondere\s*Lagerungshinweise|Hinweise\s+f&uuml;r\s+die\s+Handhabung)\s*\)$)|^Remarques/u, # 16
34
+ 'Swissmedic-Nr.' => /^Zulassungsnummer(:|$|\s*\(\s*Swissmedic\s*\)$)/u, # 17
35
+ 'Packungen' => /^Packungen($|\s*\(\s*mit\s+Angabe\s+der\s+Abgabekategorie\s*\)$)/u, # 18
35
36
  'Reg.Inhaber' => /^Zulassungsinhaberin($|\s*\(\s*Firma\s+und\s+Sitz\s+gem&auml;ss\s*Handelsregisterauszug\s*\))/u, # 19
36
- 'Zusammens.' => /^Zusammensetzung($|\s*\/\s*(Wirkstoffe|Hilsstoffe)$)/u, # 2
37
+ 'Stand d. Info.' => /^Stand\s+der\s+Information$|^Mise\s+.\s+jour$/iu, # 20
37
38
  }.each_pair do |chapter, regexp|
38
39
  if text =~ regexp
39
- next if !r.next.nil? and # skip matches in paragraph
40
- r.next.name.downcase != 'bookmarkend'
41
- id = escape_as_id(text)
40
+ # allow without line break
41
+ # next if !node.previous.inner_text.empty? and !node.next.inner_text.empty?
42
+ id = escape_id(chapter)
42
43
  @indecies << {:text => chapter, :id => id}
43
44
  return markup(:h3, text, {:id => id})
44
45
  end
45
46
  end
46
- if r.parent.previous.nil? and @indecies.empty?
47
+ if node.parent.previous.nil? and @indecies.empty?
47
48
  # The first line as package name
48
49
  @indecies << {:text => 'Titel', :id => 'titel'}
49
50
  return markup(:h2, text, {:id => 'titel'})
@@ -6,37 +6,38 @@ require 'ydocx/templates/fachinfo'
6
6
  module YDocx
7
7
  class Parser
8
8
  private
9
- def parse_as_block(r, text)
10
- text = text.strip
9
+ def parse_block(node)
10
+ text = node.inner_text.strip
11
+ text = optional_escape text
11
12
  # TODO
12
13
  # Franzoesisch
13
14
  chapters = {
14
- 'Ab&auml;nderung' => /^Was\s+sollte\s+dazu\s+beachtet\s+werden\s*\??$/u, # 4
15
- 'Dos./Anw.' => /^Wie\s+verwenden\s+Sie\s+\w+\s*\??$/u, # 8
16
- 'Eigensch.' => /^Was\s+ist\s+\w+\s+und\s+wann\s+wird\s+es\s+angewendet\s*\??$/u, # 3
17
- 'Gew&ouml;hnliche H.' => /^Was\s+ist\s+ferner\s+zu\s+beachten\s*\??$/u, # 10
18
- 'Hersteller' => /^Herstellerin$/u, # 15
19
15
  'Information' => /^Information\s+f&uuml;r\sPatientinnen\s+und\s+Patienten$/u, # 1
20
- 'Kontraind.' => /^Wann\s+darf\s+\w+\s+nicht\s+(eingenommen\s*\/\s*angewendet|eingenommen|angewendet)\s*werden\s*\??$/u, # 5
21
16
  'Name' => /^Name\s+des\s+Pr&auml;parates$/u, # 2
22
- 'Packungen' => /^Wo\s+erhalten\s+Sie\s+\w+\s*\?\s*Welche\s+Packungen\s+sind\s+erh&auml;ltlich\s*\??$/u, # 13
17
+ 'Eigensch.' => /^Was\s+ist\s+\w+\s+und\s+wann\s+wird\s+es\s+angewendet\s*\??$/u, # 3
18
+ 'Ab&auml;nderung' => /^Was\s+sollte\s+dazu\s+beachtet\s+werden\s*\??$/u, # 4
19
+ 'Kontraind.' => /^Wann\s+darf\s+\w+\s+nicht\s+(eingenommen\s*\/\s*angewendet|eingenommen|angewendet)\s*werden\s*\??$/u, # 5
20
+ 'Vorbeugung' => /^Wann\s+ist\s+bei\s+der\s+(Einnahme\s*\/\s*Anwendung|Einnahme|Anwendung)\s*von\s+\w+\s+Vorsicht\s+geboten\s*\??$/u, # 6
23
21
  'Schwanderschaft' => /^Darf\s+\w+\s+w&auml;hrend\s+einer\s+Schwangerschaft\s+oder\s+in\s+der\s+Stillzeit\s+(eingenommen\s*\/\s*angewendet|eingenommen|angewendet)\s*werden\s*\??$/u, # 7
24
- 'Stand d. Info.' => /^Diese\sPackungsbeilage\s+wurde\s+im\s+[\.A-z\s0-9]+(\s+|\s*\/\s*\w+\s+\(Monat\s*\/\s*Jahr\)\s*)letztmals\s+durch\s+die\s+Arzneimittelbeh&ouml;rde\s*\(\s*Swissmedic\s*\)\s*gepr&uuml;ft.?$/u, # 16
25
- 'Swissmedic-Nr.' => /^Zulassungsnummer$/u, # 12
22
+ 'Dos./Anw.' => /^Wie\s+verwenden\s+Sie\s+\w+\s*\??$/u, # 8
26
23
  'Unerw.Wirkungen' => /^Welche\s+Nebenwirkungen\s+kann\s+\w+\s+haben\s*\??$/u, # 9
27
- 'Verteiler' => /^Zulassungsinhaberin$/u, # 14
28
- 'Vorbeugung' => /^Wann\s+ist\s+bei\s+der\s+(Einnahme\s*\/\s*Anwendung|Einnahme|Anwendung)\s*von\s+\w+\s+Vorsicht\s+geboten\s*\??$/u, # 6
24
+ 'Gew&ouml;hnliche H.' => /^Was\s+ist\s+ferner\s+zu\s+beachten\s*\??$/u, # 10
29
25
  'Zusammens.' => /^Was\s+ist\s+in\s+\w+\s+enthalten\s*\??$/u, # 11
26
+ 'Swissmedic-Nr.' => /^Zulassungsnummer$/u, # 12
27
+ 'Packungen' => /^Wo\s+erhalten\s+Sie\s+\w+\s*\?\s*Welche\s+Packungen\s+sind\s+erh&auml;ltlich\s*\??$/u, # 13
28
+ 'Verteiler' => /^Zulassungsinhaberin$/u, # 14
29
+ 'Hersteller' => /^Herstellerin$/u, # 15
30
+ 'Stand d. Info.' => /^Diese\sPackungsbeilage\s+wurde\s+im\s+[\.A-z\s0-9]+(\s+|\s*\/\s*\w+\s+\(Monat\s*\/\s*Jahr\)\s*)letztmals\s+durch\s+die\s+Arzneimittelbeh&ouml;rde\s*\(\s*Swissmedic\s*\)\s*gepr&uuml;ft.?$/u, # 16
30
31
  }.each_pair do |chapter, regexp|
31
32
  if text =~ regexp
32
- next if !r.next.nil? and # skip matches in paragraph
33
- r.next.name.downcase != 'bookmarkend'
34
- id = escape_as_id(text)
33
+ # allow without line break
34
+ #next if !node.previous.inner_text.empty? and !node.next.inner_text.empty?
35
+ id = escape_id(chapter)
35
36
  @indecies << {:text => chapter, :id => id}
36
37
  return markup(:h3, text, {:id => id})
37
38
  end
38
39
  end
39
- if r.parent.previous.nil? and @indecies.empty?
40
+ if node.parent.previous.nil? and @indecies.empty?
40
41
  # The first line as package name
41
42
  @indecies << {:text => 'Titel', :id => 'titel'}
42
43
  return markup(:h2, text, {:id => 'titel'})
data/lib/ydocx.rb CHANGED
@@ -4,4 +4,5 @@
4
4
  require 'ydocx/document'
5
5
 
6
6
  module YDocx
7
+ VERSION = '1.0.8'
7
8
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ydocx
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.7
4
+ version: 1.0.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2012-05-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rdoc
16
- requirement: &23032560 !ruby/object:Gem::Requirement
16
+ requirement: &25506240 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '3.10'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *23032560
24
+ version_requirements: *25506240
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: hoe
27
- requirement: &23032140 !ruby/object:Gem::Requirement
27
+ requirement: &25505820 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: '2.13'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *23032140
35
+ version_requirements: *25505820
36
36
  description: ''
37
37
  email:
38
38
  - yasaka@ywesee.com, zdavatz@ywesee.com
@@ -51,7 +51,6 @@ files:
51
51
  - Rakefile
52
52
  - bin/docx2html
53
53
  - bin/docx2xml
54
- - lib/version.rb
55
54
  - lib/ydocx.rb
56
55
  - lib/ydocx/builder.rb
57
56
  - lib/ydocx/document.rb
data/lib/version.rb DELETED
@@ -1,6 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # encoding: utf-8
3
-
4
- module Docx2html
5
- VERSION = "1.0.7"
6
- end