ydocx 1.2.1 → 1.2.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d110c0230d26d291bbc973e022379d669a7fa963dc0baf27fa3b0ccaaa193bab
4
+ data.tar.gz: f21bfa758230376fadeab73e75645dd06862957c0d8c7d120bd489d7ffeb9b1b
5
+ SHA512:
6
+ metadata.gz: 75d8e7bed0a19192c43c4b75be52b9c5f9da1ef265052420d299b810189faf3b880cc3e5f207621695a6cd8d156c477aed00c87b6e82366c7d8dedc012411e90
7
+ data.tar.gz: 84bb26bab8bc03ea116f90f1df1708d4f646f0b0094a6e685723f2e7448e409a5db8c46845c54d44ee723fd9dd0cf866946d49e8b15f6a0a56d63c01610925cc
@@ -1,3 +1,26 @@
1
+ === 1.2.6 / 19.01.2021
2
+
3
+ * Updated to use for ruby 3.0.0. RMagick->rmagic
4
+ * Replaced hoe by bundler/gem_tasks
5
+
6
+ === 1.2.5 / 07.07.2014
7
+
8
+ * Updated to use rubyzip >= 1.0.0
9
+
10
+ === 1.2.4 / 26.02.2014
11
+
12
+ * Many changes by Niklaus Giger to get ready for Pseudo-Fachinformation.
13
+
14
+ === 1.2.3 / 12.07.2012
15
+
16
+ * Updated french char handling, lang in filename
17
+
18
+ === 1.2.2 / 12.07.2012
19
+
20
+ * Fixed typo in french
21
+ * Updated help output
22
+ * Updated help message about --lang option
23
+
1
24
  === 1.2.1 / 11.07.2012
2
25
 
3
26
  * Updated template, option handling for extension
data/README.txt CHANGED
@@ -1,4 +1,4 @@
1
- == ydocx - © ywesee GmbH
1
+ == ydocx by ywesee GmbH
2
2
 
3
3
  * https://github.com/zdavatz/ydocx
4
4
  * Parsing docx files with Ruby and output them as HTML and XML.
@@ -18,17 +18,29 @@
18
18
 
19
19
  == Usage
20
20
 
21
- * Usage: bin/docx2html file [options]
21
+ * Usage: docx2html file [options]
22
22
  -f, --format Format of style and chapter {(fi|fachinfo)|(pi|patinfo)|(pl|plain)|none}, default fachinfo.
23
23
  -h, --help Display this help message.
24
+ -l, --lang Language option for templates {de|fr}
24
25
  -v, --version Show version.
25
26
 
26
27
  == Using the great libraries
27
28
 
28
- * rubyzip
29
+ * rubyzip (< 1.0.0)
29
30
  * nokogiri
30
31
  * htmlentities
31
- * RMagick
32
+ * rmagick
33
+
34
+ Install them using bundle install.
35
+
36
+ Requires Ruby >= 1.9.2.
37
+
38
+ == TODO
39
+
40
+ For unknown reason the gem fails to parse properly the Présentation for sinovial_fr.
41
+ Failing spec is: ydocx should convert sinovial_FR to xml
42
+ Failure/Error: doc.xpath('//chapters/chapter[contains(heading, "Présentation")]').size.should > 0
43
+
32
44
 
33
45
  == License GPLv3.0
34
46
 
@@ -1,8 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
- # encoding: utf-8
3
2
 
4
3
  require 'ydocx/document'
5
4
 
6
- module YDocx
7
- VERSION = '1.2.1'
8
- end
@@ -6,8 +6,9 @@ require 'ydocx'
6
6
  module YDocx
7
7
  class Command
8
8
  class << self
9
- @@help = /^\-(h|\-help)$/u
10
9
  @@format = /^\-(f|\-format)$/u
10
+ @@help = /^\-(h|\-help)$/u
11
+ @@lang = /^\-(l|\-lang)$/u
11
12
  @@version = /^\-(v|\-version)$/u
12
13
  def error(message='')
13
14
  puts message
@@ -25,11 +26,57 @@ module YDocx
25
26
  Usage: #{self.command} file [options]
26
27
  -f, --format Format of style and chapter {(fi|fachinfo)|(pi|patinfo)|(pl|plain)|none}, default fachinfo.
27
28
  -h, --help Display this help message.
29
+ -l, --lang Language option for templates {de|fr}
28
30
  -v, --version Show version.
29
31
  BANNER
30
32
  puts banner
31
33
  exit
32
34
  end
35
+ def parse(action, argv)
36
+ if argv.length.odd?
37
+ self.error "#{self.command}: exit with: Invalid option"
38
+ else
39
+ args = Hash[*argv]
40
+ end
41
+ options = {}
42
+ args.each_pair do |option, value|
43
+ if option =~ @@format
44
+ case value
45
+ when 'fi', 'fachinfo'
46
+ require 'ydocx/templates/fachinfo'
47
+ options.merge!({:style => :frame}) if action == :to_html
48
+ when 'pi', 'patinfo'
49
+ require 'ydocx/templates/patinfo'
50
+ options.merge!({:style => :frame}) if action == :to_html
51
+ when 'pl', 'plain'
52
+ options.merge!({:style => true}) if action == :to_html
53
+ when 'none'
54
+ # pass
55
+ else
56
+ self.error "#{self.command}: exit with #{option}: Invalid argument"
57
+ end
58
+ elsif option =~ @@help
59
+ self.help
60
+ elsif option =~ @@lang
61
+ options.merge!({:lang => value})
62
+ elsif option.downcase =~ /\.(jpeg|jpg|png|gif)$/u and action == :to_html
63
+ # allow as default
64
+ # TODO
65
+ # refactor as normal option
66
+ # currently, support fachinfo/patinfo format only
67
+ require 'ydocx/templates/fachinfo'
68
+ options.merge!({:style => :frame})
69
+ else
70
+ self.error "#{self.command}: exit with #{option}: Unknown option"
71
+ end
72
+ end
73
+ if !args.include?('-f') and !args.include?('--format')
74
+ # default fachinfo
75
+ require 'ydocx/templates/fachinfo'
76
+ options.merge!({:style => :frame}) if action == :to_html
77
+ end
78
+ options
79
+ end
33
80
  def report(action, path)
34
81
  puts "#{self.command}: generated #{File.expand_path(path)}"
35
82
  exit
@@ -48,41 +95,8 @@ Usage: #{self.command} file [options]
48
95
  elsif !File.extname(path).match(/^\.docx$/)
49
96
  self.error "#{self.command}: cannot open #{file}: Not a docx file"
50
97
  else
51
- options = {}
52
- if option = argv.shift
53
- if option =~ @@format
54
- case argv[0]
55
- when 'fi', 'fachinfo'
56
- require 'ydocx/templates/fachinfo'
57
- options.merge!({:style => :frame}) if action == :to_html
58
- when 'pi', 'patinfo'
59
- require 'ydocx/templates/patinfo'
60
- options.merge!({:style => :frame}) if action == :to_html
61
- when 'pl', 'plain'
62
- options.merge!({:style => true}) if action == :to_html
63
- when 'none'
64
- # pass
65
- else
66
- self.error "#{self.command}: exit with #{option}: Invalid argument"
67
- end
68
- elsif option =~ @@help
69
- self.help
70
- elsif option.downcase =~ /\.(jpeg|jpg|png|gif)$/u and action == :to_html
71
- # allow as default
72
- # TODO
73
- # refactor as normal option
74
- # currently, support fachinfo/patinfo format only
75
- require 'ydocx/templates/fachinfo'
76
- options.merge!({:style => :frame})
77
- else
78
- self.error "#{self.command}: exit with #{option}: Unknown option"
79
- end
80
- else
81
- # default fachinfo
82
- require 'ydocx/templates/fachinfo'
83
- options.merge!({:style => :frame}) if action == :to_html
84
- end
85
- doc = YDocx::Document.open(path)
98
+ options = self.parse(action, argv)
99
+ doc = YDocx::Document.open(path, options)
86
100
  doc.send(action, path, options)
87
101
  ext = self.extname(action)
88
102
  self.report action, doc.output_file(ext[1..-1])
@@ -2,11 +2,11 @@
2
2
  # encoding: utf-8
3
3
 
4
4
  require 'pathname'
5
- require 'zip/zip'
5
+ require 'zip'
6
6
  begin
7
- require 'RMagick'
7
+ require 'rmagick'
8
8
  rescue LoadError
9
- warn "Couldn't load RMagick: .wmf conversion off"
9
+ warn "Couldn't load rmagick: .wmf conversion off"
10
10
  end
11
11
  require 'ydocx/parser'
12
12
  require 'ydocx/builder'
@@ -83,7 +83,7 @@ module YDocx
83
83
  def create_files
84
84
  files_dir = output_directory
85
85
  mkdir Pathname.new(files_dir) unless files_dir.exist?
86
- @zip = Zip::ZipFile.open(@path.realpath)
86
+ @zip = Zip::File.open(@path.realpath)
87
87
  @images.each do |image|
88
88
  origin_path = Pathname.new image[:origin] # media/filename.ext
89
89
  source_path = Pathname.new image[:source] # images/filename.ext
@@ -118,10 +118,11 @@ module YDocx
118
118
  end
119
119
  def read(file)
120
120
  @path = Pathname.new file
121
- @zip = Zip::ZipFile.open(@path.realpath)
121
+ @zip = Zip::File.open(@path.realpath)
122
122
  doc = @zip.find_entry('word/document.xml').get_input_stream
123
123
  rel = @zip.find_entry('word/_rels/document.xml.rels').get_input_stream
124
124
  @parser = Parser.new(doc, rel) do |parser|
125
+ parser.lang = @options[:lang] if @options[:lang]
125
126
  @contents = parser.parse
126
127
  @indecies = parser.indecies
127
128
  @images = parser.images
@@ -8,7 +8,7 @@ require 'ydocx/markup_method'
8
8
  module YDocx
9
9
  class Parser
10
10
  include MarkupMethod
11
- attr_accessor :indecies, :images, :result, :space
11
+ attr_accessor :indecies, :images, :result, :space, :lang
12
12
  def initialize(doc, rel)
13
13
  @doc = Nokogiri::XML.parse(doc)
14
14
  @rel = Nokogiri::XML.parse(rel)
@@ -55,7 +55,7 @@ module YDocx
55
55
  if symbol
56
56
  _text = ''
57
57
  text.unpack('U*').each do |char|
58
- _text << optional_replace(char.to_s(16))
58
+ _text << character_replace(char.to_s(16))
59
59
  end
60
60
  text = _text
61
61
  end
@@ -76,14 +76,14 @@ module YDocx
76
76
  end
77
77
  text
78
78
  end
79
- def optional_escape(text)
79
+ def character_encode(text)
80
80
  text.force_encoding('utf-8')
81
81
  # NOTE
82
82
  # :named only for escape at Builder
83
83
  text = @coder.encode(text, :named)
84
84
  text
85
85
  end
86
- def optional_replace(code)
86
+ def character_replace(code)
87
87
  code = '0x' + code
88
88
  # NOTE
89
89
  # replace with rsemble html character ref
@@ -143,6 +143,9 @@ module YDocx
143
143
  #p "char : " + @coder.decode("&#%s;" % code.hex.to_s)
144
144
  end
145
145
  end
146
+ def optional_escape(text)
147
+ text
148
+ end
146
149
  def parse_block(node)
147
150
  nil # default no block element
148
151
  end
@@ -155,19 +158,19 @@ module YDocx
155
158
  ns = r.namespaces.merge additional_namespaces
156
159
  [
157
160
  { # old type shape
158
- :attr => 'id',
161
+ :attr => 'r:id',
159
162
  :path => 'w:pict//v:shape//v:imagedata',
160
163
  :wrap => 'w:pict//v:shape//w10:wrap',
161
164
  :type => '',
162
165
  },
163
166
  { # in anchor
164
- :attr => 'embed',
167
+ :attr => 'r:embed',
165
168
  :path => 'w:drawing//wp:anchor//a:graphic//a:graphicData//pic:pic//pic:blipFill//a:blip',
166
169
  :wrap => 'w:drawing//wp:anchor//wp:wrapTight',
167
170
  :type => 'wrapText',
168
171
  },
169
172
  { # stand alone
170
- :attr => 'embed',
173
+ :attr => 'r:embed',
171
174
  :path => 'w:drawing//a:graphic//a:graphicData//pic:pic//pic:blipFill//a:blip',
172
175
  :wrap => 'w:drawing//wp:wrapTight',
173
176
  :type => 'wrapText',
@@ -229,8 +232,8 @@ module YDocx
229
232
  end
230
233
  end
231
234
  unless r.xpath('w:sym').empty?
232
- code = r.xpath('w:sym').first['char'].downcase # w:char
233
- content << optional_replace(code)
235
+ code = r.xpath('w:sym').first['w:char'].downcase # w:char
236
+ content << character_replace(code)
234
237
  pos += 1
235
238
  end
236
239
  if !r.xpath('w:pict').empty? or !r.xpath('w:drawing').empty?
@@ -276,6 +279,7 @@ module YDocx
276
279
  end
277
280
  def parse_text(r, lstrip=false)
278
281
  text = r.xpath('w:t').map(&:text).join('')
282
+ text = character_encode(text)
279
283
  text = optional_escape(text)
280
284
  text = text.lstrip if lstrip
281
285
  if rpr = r.xpath('w:rPr')
@@ -6,45 +6,100 @@ require 'cgi'
6
6
  module YDocx
7
7
  class Parser
8
8
  attr_accessor :code, :lang
9
+ @@figure_pattern = /&lsquo;|&rsquo;|&apos;|&acute;/
9
10
  def init
10
11
  @image_path = 'image'
11
12
  @code = nil
12
- @lang = 'DE'
13
+ @lang ||= 'de'
13
14
  end
14
- private
15
+ ###
16
+ # Fachinfo Chapters
17
+ # 1. name
18
+ # 2. composition
19
+ # 3. galenic form
20
+ # 4. indications
21
+ # 5. usage
22
+ # 6. contra_indications
23
+ # 7. restrictions
24
+ # 8. interactions
25
+ # 9. pregnancy
26
+ # 10. driving_ability
27
+ # 11. unwanted_effects
28
+ # 12. overdose
29
+ # 13. effects
30
+ # 14. kinetic
31
+ # 15. preclinic
32
+ # 16. other_advice
33
+ # 17. iksnr
34
+ # 19. packages
35
+ # 19. registration_owner
36
+ # 20. date
15
37
  def chapters
16
- # TODO
17
- # Franzoesisch
18
38
  chapters = {
19
- 'Name' => /^Name\s+des\s+Pr&auml;parates$/u, # 1
20
- 'Zusammens.' => /^Zusammensetzung($|\s*\/\s*(Wirkstoffe|Hilsstoffe)$)/u, # 2
21
- 'Galen.Form' => /^Galenische\s+Form\s*(und|\/)\s*Wirkstoffmenge\s+pro\s+Einheit$|^Forme\s*gal.nique/iu, # 3
22
- 'Ind./Anw.m&ouml;gl.' => /^Indikationen(\s+|\s*(\/|und)\s*)Anwendungsm&ouml;glichkeiten$|^Indications/u, # 4
23
- 'Dos./Anw.' => /^Dosierung\s*(\/|und)\s*Anwendung/u, # 5
24
- 'Kontraind.' => /^Kontraindikationen($|\s*\(\s*absolute\s+Kontraindikationen\s*\)$)/u, # 6
25
- 'Warn.hinw.' => /^Warnhinweise\s+und\s+Vorsichtsmassnahmen($|\s*\/\s*(relative\s+Kontraindikationen|Warnhinweise\s*und\s*Vorsichtsmassnahmen)$)/u, # 7
26
- 'Interakt.' => /^Interaktionen$|^Interactions/u, # 8
27
- 'Schwangerschaft' => /^Schwangerschaft(,\s*|\s*\/\s*|\s+und\s+)Stillzeit$/u, # 9
28
- 'Fahrt&uuml;cht.' => /^Wirkung\s+auf\s+die\s+Fahrt&uuml;chtigkeit\s+und\s+auf\s+das\s+Bedienen\s+von\s+Maschinen$/u, # 10
29
- 'Unerw.Wirkungen' => /^Unerw&uuml;nschte\s+Wirkungen$/u, # 11
30
- '&Uuml;berdos.' => /^&Uuml;berdosierung$|^Surdosage$/u, # 12
31
- 'Eigensch.' => /^Eigenschaften\s*\/\s*Wirkungen($|\s*\(\s*(ATC\-Code|Wirkungsmechanismus|Pharmakodyamik|Klinische\s+Wirksamkeit)\s*\)\s*$)|^Propri.t.s/iu, # 13
32
- 'Pharm.kinetik' => /^Pharmakokinetik($|\s*\((Absorption,\s*Distribution,\s*Metabolisms,\s*Elimination\s|Kinetik\s+spezieller\s+Patientengruppen)*\)$)|^Pharmacocin.tique?/iu, # 14
33
- 'Pr&auml;klin.' => /^Pr&auml;klinische\s+Daten$/u, # 15
34
- 'Sonstige H.' => /^Sonstige\s*Hinweise($|\s*\(\s*(Inkompatibilit&auml;ten|Beeinflussung\s*diagnostischer\s*Methoden|Haltbarkeit|Besondere\s*Lagerungshinweise|Hinweise\s+f&uuml;r\s+die\s+Handhabung)\s*\)$)|^Remarques/u, # 16
35
- 'Swissmedic-Nr.' => /^Zulassungsnummer(n|:|$|\s*\(\s*Swissmedic\s*\)$)/u, # 17
36
- 'Packungen' => /^Packungen($|\s*\(\s*mit\s+Angabe\s+der\s+Abgabekategorie\s*\)$)/u, # 18
37
- 'Reg.Inhaber' => /^Zulassungsinhaberin($|\s*\(\s*Firma\s+und\s+Sitz\s+gem&auml;ss\s*Handelsregisterauszug\s*\))/u, # 19
38
- 'Stand d. Info.' => /^Stand\s+der\s+Information$|^Mise\s+.\s+jour$/iu, # 20
39
+ :de => {
40
+ 'name' => /^Name\s+des\s+Präparates$/u, # 1
41
+ 'composition' => /^Zusammensetzung|Wirkstoffe|Hilsstoffe/u, # 2
42
+ 'galenic_form' => /^Galenische\s+Form\s*(und|\/)\s*Wirkstoffmenge\s+pro\s+Einheit$/iu, # 3
43
+ 'indications' => /^Indikationen(\s+|\s*(\/|und)\s*)Anwendungsmöglichkeiten$/u, # 4
44
+ 'usage' => /^Dosierung\s*(\/|und)\s*Anwendung/u, # 5
45
+ 'contra_indications' => /^Kontraindikationen($|\s*\(\s*absolute\s+Kontraindikationen\s*\)$)/u, # 6
46
+ 'restrictions' => /^Warnhinweise\s+und\s+Vorsichtsmassnahmen($|\s*\/\s*(relative\s+Kontraindikationen|Warnhinweise\s*und\s*Vorsichtsmassnahmen)$)/u, # 7
47
+ 'interactions' => /^Interaktionen$/u, # 8
48
+ 'pregnancy' => /^Schwangerschaft(,\s*|\s*\/\s*|\s+und\s+)Stillzeit$/u, # 9
49
+ 'driving_ability' => /^Wirkung\s+auf\s+die\s+Fahrtüchtigkeit\s+und\s+auf\s+das\s+Bedienen\s+von\s+Maschinen$/u, # 10
50
+ 'unwanted_effects' => /^Unerwünschte\s+Wirkungen$/u, # 11
51
+ 'overdose' => /^Überdosierung$/u, # 12
52
+ 'effects' => /^Eigenschaften\s*\/\s*Wirkungen($|\s*\(\s*(ATC\-Code|Wirkungsmechanismus|Pharmakodyamik|Klinische\s+Wirksamkeit)\s*\)\s*$)/iu, # 13
53
+ 'kinetic' => /^Pharmakokinetik($|\s*\((Absorption,\s*Distribution,\s*Metabolisms,\s*Elimination\s|Kinetik\s+spezieller\s+Patientengruppen)*\)$)/iu, # 14
54
+ 'preclinic' => /^Präklinische\s+Daten$/u, # 15
55
+ 'other_advice' => /^Sonstige\s*Hinweise($|\s*\(\s*(Inkompatibilitäten|Beeinflussung\s*diagnostischer\s*Methoden|Haltbarkeit|Besondere\s*Lagerungshinweise|Hinweise\s+für\s+die\s+Handhabung)\s*\)$)|^Remarques/u, # 16
56
+ 'iksnrs' => /^Zulassungsnummer(n|:|$|\s*\(\s*Swissmedic\s*\)$)/u, # 17
57
+ 'packages' => /^Packungen($|\s*\(\s*mit\s+Angabe\s+der\s+Abgabekategorie\s*\)$)/u, # 18
58
+ 'registration_owner' => /^Zulassungsinhaberin($|\s*\(\s*Firma\s+und\s+Sitz\s+gemäss\s*Handelsregisterauszug\s*\))/u, # 19
59
+ 'date' => /^Stand\s+der\s+Information$/iu, # 20
60
+ 'fabrication' => /^Herstellerin/u,
61
+ 'company' => /^Vertriebsfirma/u,
62
+ },
63
+ :fr => {
64
+ 'name' => /^Nom$/u, # 1
65
+ 'composition' => /^Composition$/u, # 2
66
+ 'galenic_form' => /^Forme\s+galénique\s+et\s+quantité\s+de\s+principe\s+actif\s+par\s+unité|^Forme\s*gal.nique/iu, # 3
67
+ 'indications' => /^Indications/u, # 4
68
+ 'usage' => /^Posologiei/u, # 5
69
+ 'contra_indications' => /^Contre\-indications/iu, # 6
70
+ 'restrictions' => /^Mises/u, # 7
71
+ 'interactions' => /^Interactions/u, # 8
72
+ 'pregnancy' => /^Grossesse\s*\/\s*Allaitement/u, # 9
73
+ 'driving_ability' => /^Effet\s+sur\s+l'aptitude\s+à;\s+la\s+conduite\s+et\s+l'utilisation\s+de\s+machines/u, # 10
74
+ 'unwanted_effects' => /^Effets/u, # 11
75
+ 'overdose' => /^Surdosage$/u, # 12
76
+ 'effects' => /^Propriétés/iu, # 13
77
+ 'kinetic' => /^Pharmacocinétique$/iu, # 14
78
+ 'preclinic' => /^Données\s+précliniques$/u, # 15
79
+ 'other_advice' => /^Remarques/u, # 16
80
+ 'iksnrs' => /^Numéro\s+d'autorisation$/u, # 17
81
+ 'packages' => /^Présentation/iu, # 18
82
+ 'registration_owner' => /^Titulaire\s+de\s+l'autorisation$/u, # 19
83
+ 'date' => /^Mise à jour/iu, # 20
84
+ 'fabrication' => /^Fabricant$/u,
85
+ 'company' => /^Distributeur/u,
86
+ }
39
87
  }
88
+ if @lang == 'fr' || @lang == :fr
89
+ chapters[:fr]
90
+ else
91
+ chapters[:de]
92
+ end
40
93
  end
94
+ private
41
95
  def escape_id(text)
42
- CGI.escape(text.gsub(/&(.)uml;/, '\1e').gsub(/\s*\/\s*|\s+|\/|\-/, '_').gsub(/\./, '').downcase)
96
+ CGI.escape(text.
97
+ gsub(/&(.)uml;/, '\1e').gsub(/&apos;/, '').gsub(/&(eacute|agrave);/, 'e').
98
+ gsub(/\s*\/\s*|\s+|\/|\-/, '_').gsub(/\./, '').downcase)
43
99
  end
44
- def parse_code(text) # swissmedic nummer
45
- if text =~ /^\s*(\d{2})(&lsquo;|&rsquo;|&apos;|.|\s*)(\d{3})\s*\(\s*Swiss\s*medic\s*\)(\s*|.)$/iu
46
- @code = "%5d" % ($1 + $3)
47
- elsif text =~ /^\s*(\d{5})(.*|\s*)\s*\(\s*Swiss\s*medic\s*\)(\s*|.)$/iu
100
+ def parse_code(text) # swissmedic number
101
+ if text.gsub(@@figure_pattern, '') =~
102
+ /^\s*(\d{5})(.*|\s*)\s*\(\s*Swiss\s*medic\s*\)(\s*|.)$/iu
48
103
  @code = "%5d" % $1
49
104
  else
50
105
  nil
@@ -54,18 +109,19 @@ module YDocx
54
109
  return markup(:h2, text, {:id => id})
55
110
  end
56
111
  def parse_title(node, text)
57
- if @indecies.empty? and !text.empty? and
58
- (node.previous.inner_text.strip.empty? or node.parent.previous.nil?)
112
+ if @indecies.empty? and !text.empty? and node.previous and
113
+ (node.parent.previous.nil? or node.previous.inner_text.strip.empty?)
59
114
  # The first line as package name
60
- @indecies << {:text => 'Titel', :id => 'titel'}
61
- return markup(:h1, text, {:id => 'titel'})
115
+ title = (@lang == 'fr' ? 'Titre' : 'Titel')
116
+ @indecies << {:text => title, :id => title.downcase}
117
+ return markup(:h1, text, {:id => title.downcase})
62
118
  else
63
119
  return nil
64
120
  end
65
121
  end
66
122
  def parse_block(node)
67
123
  text = node.inner_text.strip
68
- text = optional_escape text
124
+ text = character_encode(text)
69
125
  chapters.each_pair do |chapter, regexp|
70
126
  if text =~ regexp
71
127
  # allow without line break
@@ -73,13 +129,12 @@ module YDocx
73
129
  id = escape_id(chapter)
74
130
  @indecies << {:text => chapter, :id => id}
75
131
  return parse_heading(text, id)
76
- elsif parse_code(text)
77
- return nil
78
132
  end
79
133
  end
80
134
  if title = parse_title(node, text)
81
135
  return title
82
136
  end
137
+ parse_code(text)
83
138
  return nil
84
139
  end
85
140
  end
@@ -177,11 +232,13 @@ div#container {
177
232
  end
178
233
  style.gsub(/\s\s+|\n/, ' ')
179
234
  end
180
- def resolve_path(path)
235
+ def resolve_path(path) # image src
181
236
  if reference = @references.shift
182
237
  File.dirname(path) + '/' + reference.basename.to_s
183
- else
238
+ elsif @files.to_s =~ /\d{5}/
184
239
  path
240
+ else
241
+ @files.join path
185
242
  end
186
243
  end
187
244
  end
@@ -202,10 +259,11 @@ div#container {
202
259
  end
203
260
  @files
204
261
  end
205
- def output_file(ext)
262
+ def output_file(ext) # html
263
+ lang = (@parser.lang.downcase == 'fr' ? 'fr' : 'de')
206
264
  if @parser.code
207
265
  filename = @parser.code
208
- output_directory.join "#{filename}.#{ext.to_s}"
266
+ output_directory.join "#{lang}_#{filename}.#{ext.to_s}"
209
267
  else # default
210
268
  @path.sub_ext(".#{ext.to_s}")
211
269
  end
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ module YDocx
4
+ VERSION = '1.2.6'
5
+ end
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ require 'spec_helper'
4
+
5
+ RSpec.configure do |config|
6
+ config.expect_with(:rspec) { |c| c.syntax = :should }
7
+ end
8
+
9
+ describe "ydocx" do
10
+
11
+ before :all do
12
+ end
13
+
14
+ before :each do
15
+ Dir.glob("#{YDcoxHelper::DataDir}/*.xml").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
16
+ Dir.glob("#{YDcoxHelper::DataDir}/*.html").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
17
+ end
18
+
19
+ after :each do
20
+ end
21
+
22
+ after :all do
23
+ Dir.glob("#{YDcoxHelper::DataDir}/*.xml").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
24
+ Dir.glob("#{YDcoxHelper::DataDir}/*.html").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
25
+ end
26
+
27
+ it "should convert sinovial_FR to xml" do
28
+ require 'ydocx/templates/fachinfo'
29
+ sinovial_FR = File.join(YDcoxHelper::DataDir, 'Sinovial_FR.docx')
30
+ File.exists?(sinovial_FR).should be true
31
+ doc = YDocx::Document.open(sinovial_FR, { :lang => :fr})
32
+ sinovial_FR_xml = sinovial_FR.sub('.docx', '.xml')
33
+ doc.to_xml(sinovial_FR_xml, {:format => :fachinfo})
34
+ out = doc.output_file('xml')
35
+ File.exists?(sinovial_FR_xml).should be true
36
+ doc.parser.lang.to_s.should == 'fr'
37
+ doc = Nokogiri::XML(open(sinovial_FR_xml))
38
+ doc.xpath('//chapters/chapter[contains(heading, "Fabricant")]').size.should > 0
39
+ doc.xpath('//chapters/chapter[contains(heading, "Distributeur")]').size.should > 0
40
+ doc.xpath('//chapters/chapter[contains(heading, "Remarques particulières")]').size.should > 0
41
+ end
42
+
43
+ it "should convert sinovial_DE to xml" do
44
+ sinovial_DE = File.join(YDcoxHelper::DataDir, 'Sinovial_DE.docx')
45
+ File.exists?(sinovial_DE).should be true
46
+ doc = YDocx::Document.open(sinovial_DE)
47
+ sinovial_DE_xml = sinovial_DE.sub('.docx', '.xml')
48
+ doc.to_xml(sinovial_DE_xml, {:format => :fachinfo})
49
+ out = doc.output_file('xml')
50
+ File.exists?(sinovial_DE_xml).should be true
51
+ doc.parser.lang.to_s.should == 'de'
52
+ doc = Nokogiri::XML(open(sinovial_DE_xml))
53
+ doc.xpath('//chapters/chapter[contains(heading, "Packung")]').size.should > 0
54
+ doc.xpath('//chapters/chapter[contains(heading, "Hersteller")]').size.should > 0
55
+ doc.xpath('//chapters/chapter[contains(heading, "Vertriebsfirma")]').size.should > 0
56
+ end
57
+
58
+ it "should convert sinovial_DE to html" do
59
+ sinovial_DE = File.join(YDcoxHelper::DataDir, 'Sinovial_DE.docx')
60
+ File.exists?(sinovial_DE).should be true
61
+ doc = YDocx::Document.open(sinovial_DE)
62
+ sinovial_DE_html = sinovial_DE.sub('.docx', '.html')
63
+ doc.to_html(sinovial_DE_html, {:format => :fachinfo})
64
+ out = doc.output_file('html')
65
+ File.exists?(sinovial_DE_html).should be true
66
+ end
67
+
68
+ it "should convert various pseudo fachinfo to xml" do
69
+ require 'ydocx/templates/fachinfo'
70
+ files = [ 'Sinovial_0.8_DE.docx', 'Sinovial_0.8_FR.docx',
71
+ 'Sinovial_DE.docx', 'Sinovial_FR.docx',
72
+ ]
73
+ files.each {
74
+ |file|
75
+ file_name = File.join(YDcoxHelper::DataDir, file)
76
+ File.exists?(file_name).should be true
77
+ file.match('_DE') ? lang = 'de' : lang = 'fr'
78
+ doc = YDocx::Document.open(file_name, { :lang => lang} )
79
+ file_name_xml = file_name.sub('.docx', '.xml')
80
+ doc.to_xml(file_name_xml, {:format => :fachinfo})
81
+ out = doc.output_file('xml')
82
+ File.exists?(file_name_xml).should be true
83
+ doc.parser.lang.should == lang
84
+ doc = Nokogiri::XML(open(file_name_xml))
85
+ }
86
+ end
87
+ end
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'simplecov'
5
+ if ENV['COVERAGE']
6
+ SimpleCov.start
7
+ end
8
+
9
+ require 'fileutils'
10
+ require 'pp'
11
+ require 'ydocx'
12
+
13
+ module YDcoxHelper
14
+ DataDir = File.join(File.dirname(__FILE__), '..', 'spec', 'data')
15
+ end
16
+ require 'rspec'
17
+ require 'ydocx'
metadata CHANGED
@@ -1,88 +1,64 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ydocx
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
5
- prerelease:
4
+ version: 1.2.6
6
5
  platform: ruby
7
6
  authors:
8
- - Yasuhiro Asaka, Zeno R.R. Davatz
7
+ - "'Yasuhiro Asak, Zeno R.R. Davatz"
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-07-11 00:00:00.000000000 Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
15
- name: rdoc
16
- requirement: &6667440 !ruby/object:Gem::Requirement
17
- none: false
18
- requirements:
19
- - - ~>
20
- - !ruby/object:Gem::Version
21
- version: '3.10'
22
- type: :development
23
- prerelease: false
24
- version_requirements: *6667440
25
- - !ruby/object:Gem::Dependency
26
- name: hoe
27
- requirement: &6667000 !ruby/object:Gem::Requirement
28
- none: false
29
- requirements:
30
- - - ~>
31
- - !ruby/object:Gem::Version
32
- version: '2.13'
33
- type: :development
34
- prerelease: false
35
- version_requirements: *6667000
11
+ date: 2021-01-20 00:00:00.000000000 Z
12
+ dependencies: []
36
13
  description: ''
37
- email:
38
- - yasaka@ywesee.com, zdavatz@ywesee.com
14
+ email: yasaka@ywesee.com, zdavatz@ywesee.com
39
15
  executables:
40
- - docx2html
41
16
  - docx2xml
17
+ - docx2html
42
18
  extensions: []
43
- extra_rdoc_files:
44
- - History.txt
45
- - Manifest.txt
46
- - README.txt
19
+ extra_rdoc_files: []
47
20
  files:
48
21
  - History.txt
49
22
  - Manifest.txt
50
23
  - README.txt
51
- - Rakefile
52
24
  - bin/docx2html
53
25
  - bin/docx2xml
54
26
  - lib/ydocx.rb
55
27
  - lib/ydocx/builder.rb
28
+ - lib/ydocx/command.rb
56
29
  - lib/ydocx/document.rb
57
30
  - lib/ydocx/markup_method.rb
58
31
  - lib/ydocx/parser.rb
59
- - lib/ydocx/command.rb
60
32
  - lib/ydocx/templates/fachinfo.rb
61
33
  - lib/ydocx/templates/patinfo.rb
62
- homepage: https://github.com/zdavatz/ydocx
63
- licenses: []
34
+ - lib/ydocx/version.rb
35
+ - spec/data/Sinovial_0.8_DE.docx
36
+ - spec/data/Sinovial_0.8_FR.docx
37
+ - spec/data/Sinovial_DE.docx
38
+ - spec/data/Sinovial_FR.docx
39
+ - spec/lib/sinovial_spec.rb
40
+ - spec/spec_helper.rb
41
+ homepage: https://github.com/zdavatz/ydocx/
42
+ licenses:
43
+ - GPL-3.0
44
+ metadata: {}
64
45
  post_install_message:
65
- rdoc_options:
66
- - --main
67
- - README.txt
46
+ rdoc_options: []
68
47
  require_paths:
69
48
  - lib
70
49
  required_ruby_version: !ruby/object:Gem::Requirement
71
- none: false
72
50
  requirements:
73
- - - ! '>='
51
+ - - ">="
74
52
  - !ruby/object:Gem::Version
75
53
  version: '0'
76
54
  required_rubygems_version: !ruby/object:Gem::Requirement
77
- none: false
78
55
  requirements:
79
- - - ! '>='
56
+ - - ">="
80
57
  - !ruby/object:Gem::Version
81
58
  version: '0'
82
59
  requirements: []
83
- rubyforge_project: ydocx
84
- rubygems_version: 1.8.15
60
+ rubygems_version: 3.1.2
85
61
  signing_key:
86
- specification_version: 3
87
- summary: ''
62
+ specification_version: 4
63
+ summary: Parsing docx files with Ruby and output them as HTML and XML
88
64
  test_files: []
data/Rakefile DELETED
@@ -1,17 +0,0 @@
1
- # -*- ruby -*-
2
-
3
- require 'rubygems'
4
- require 'hoe'
5
-
6
- # Hoe.plugin :compiler
7
- # Hoe.plugin :gem_prelude_sucks
8
- # Hoe.plugin :inline
9
- # Hoe.plugin :minitest
10
- # Hoe.plugin :racc
11
- # Hoe.plugin :rubyforge
12
-
13
- Hoe.spec 'ydocx' do
14
-
15
- developer('Yasuhiro Asaka, Zeno R.R. Davatz', 'yasaka@ywesee.com, zdavatz@ywesee.com')
16
-
17
- end