ydocx 1.2.1 → 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d110c0230d26d291bbc973e022379d669a7fa963dc0baf27fa3b0ccaaa193bab
4
+ data.tar.gz: f21bfa758230376fadeab73e75645dd06862957c0d8c7d120bd489d7ffeb9b1b
5
+ SHA512:
6
+ metadata.gz: 75d8e7bed0a19192c43c4b75be52b9c5f9da1ef265052420d299b810189faf3b880cc3e5f207621695a6cd8d156c477aed00c87b6e82366c7d8dedc012411e90
7
+ data.tar.gz: 84bb26bab8bc03ea116f90f1df1708d4f646f0b0094a6e685723f2e7448e409a5db8c46845c54d44ee723fd9dd0cf866946d49e8b15f6a0a56d63c01610925cc
@@ -1,3 +1,26 @@
1
+ === 1.2.6 / 19.01.2021
2
+
3
+ * Updated to use for ruby 3.0.0. RMagick->rmagic
4
+ * Replaced hoe by bundler/gem_tasks
5
+
6
+ === 1.2.5 / 07.07.2014
7
+
8
+ * Updated to use rubyzip >= 1.0.0
9
+
10
+ === 1.2.4 / 26.02.2014
11
+
12
+ * Many changes by Niklaus Giger to get ready for Pseudo-Fachinformation.
13
+
14
+ === 1.2.3 / 12.07.2012
15
+
16
+ * Updated french char handling, lang in filename
17
+
18
+ === 1.2.2 / 12.07.2012
19
+
20
+ * Fixed typo in french
21
+ * Updated help output
22
+ * Updated help message about --lang option
23
+
1
24
  === 1.2.1 / 11.07.2012
2
25
 
3
26
  * Updated template, option handling for extension
data/README.txt CHANGED
@@ -1,4 +1,4 @@
1
- == ydocx - © ywesee GmbH
1
+ == ydocx by ywesee GmbH
2
2
 
3
3
  * https://github.com/zdavatz/ydocx
4
4
  * Parsing docx files with Ruby and output them as HTML and XML.
@@ -18,17 +18,29 @@
18
18
 
19
19
  == Usage
20
20
 
21
- * Usage: bin/docx2html file [options]
21
+ * Usage: docx2html file [options]
22
22
  -f, --format Format of style and chapter {(fi|fachinfo)|(pi|patinfo)|(pl|plain)|none}, default fachinfo.
23
23
  -h, --help Display this help message.
24
+ -l, --lang Language option for templates {de|fr}
24
25
  -v, --version Show version.
25
26
 
26
27
  == Using the great libraries
27
28
 
28
- * rubyzip
29
+ * rubyzip (< 1.0.0)
29
30
  * nokogiri
30
31
  * htmlentities
31
- * RMagick
32
+ * rmagick
33
+
34
+ Install them using bundle install.
35
+
36
+ Requires Ruby >= 1.9.2.
37
+
38
+ == TODO
39
+
40
+ For unknown reason the gem fails to parse properly the Présentation for sinovial_fr.
41
+ Failing spec is: ydocx should convert sinovial_FR to xml
42
+ Failure/Error: doc.xpath('//chapters/chapter[contains(heading, "Présentation")]').size.should > 0
43
+
32
44
 
33
45
  == License GPLv3.0
34
46
 
@@ -1,8 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
- # encoding: utf-8
3
2
 
4
3
  require 'ydocx/document'
5
4
 
6
- module YDocx
7
- VERSION = '1.2.1'
8
- end
@@ -6,8 +6,9 @@ require 'ydocx'
6
6
  module YDocx
7
7
  class Command
8
8
  class << self
9
- @@help = /^\-(h|\-help)$/u
10
9
  @@format = /^\-(f|\-format)$/u
10
+ @@help = /^\-(h|\-help)$/u
11
+ @@lang = /^\-(l|\-lang)$/u
11
12
  @@version = /^\-(v|\-version)$/u
12
13
  def error(message='')
13
14
  puts message
@@ -25,11 +26,57 @@ module YDocx
25
26
  Usage: #{self.command} file [options]
26
27
  -f, --format Format of style and chapter {(fi|fachinfo)|(pi|patinfo)|(pl|plain)|none}, default fachinfo.
27
28
  -h, --help Display this help message.
29
+ -l, --lang Language option for templates {de|fr}
28
30
  -v, --version Show version.
29
31
  BANNER
30
32
  puts banner
31
33
  exit
32
34
  end
35
+ def parse(action, argv)
36
+ if argv.length.odd?
37
+ self.error "#{self.command}: exit with: Invalid option"
38
+ else
39
+ args = Hash[*argv]
40
+ end
41
+ options = {}
42
+ args.each_pair do |option, value|
43
+ if option =~ @@format
44
+ case value
45
+ when 'fi', 'fachinfo'
46
+ require 'ydocx/templates/fachinfo'
47
+ options.merge!({:style => :frame}) if action == :to_html
48
+ when 'pi', 'patinfo'
49
+ require 'ydocx/templates/patinfo'
50
+ options.merge!({:style => :frame}) if action == :to_html
51
+ when 'pl', 'plain'
52
+ options.merge!({:style => true}) if action == :to_html
53
+ when 'none'
54
+ # pass
55
+ else
56
+ self.error "#{self.command}: exit with #{option}: Invalid argument"
57
+ end
58
+ elsif option =~ @@help
59
+ self.help
60
+ elsif option =~ @@lang
61
+ options.merge!({:lang => value})
62
+ elsif option.downcase =~ /\.(jpeg|jpg|png|gif)$/u and action == :to_html
63
+ # allow as default
64
+ # TODO
65
+ # refactor as normal option
66
+ # currently, support fachinfo/patinfo format only
67
+ require 'ydocx/templates/fachinfo'
68
+ options.merge!({:style => :frame})
69
+ else
70
+ self.error "#{self.command}: exit with #{option}: Unknown option"
71
+ end
72
+ end
73
+ if !args.include?('-f') and !args.include?('--format')
74
+ # default fachinfo
75
+ require 'ydocx/templates/fachinfo'
76
+ options.merge!({:style => :frame}) if action == :to_html
77
+ end
78
+ options
79
+ end
33
80
  def report(action, path)
34
81
  puts "#{self.command}: generated #{File.expand_path(path)}"
35
82
  exit
@@ -48,41 +95,8 @@ Usage: #{self.command} file [options]
48
95
  elsif !File.extname(path).match(/^\.docx$/)
49
96
  self.error "#{self.command}: cannot open #{file}: Not a docx file"
50
97
  else
51
- options = {}
52
- if option = argv.shift
53
- if option =~ @@format
54
- case argv[0]
55
- when 'fi', 'fachinfo'
56
- require 'ydocx/templates/fachinfo'
57
- options.merge!({:style => :frame}) if action == :to_html
58
- when 'pi', 'patinfo'
59
- require 'ydocx/templates/patinfo'
60
- options.merge!({:style => :frame}) if action == :to_html
61
- when 'pl', 'plain'
62
- options.merge!({:style => true}) if action == :to_html
63
- when 'none'
64
- # pass
65
- else
66
- self.error "#{self.command}: exit with #{option}: Invalid argument"
67
- end
68
- elsif option =~ @@help
69
- self.help
70
- elsif option.downcase =~ /\.(jpeg|jpg|png|gif)$/u and action == :to_html
71
- # allow as default
72
- # TODO
73
- # refactor as normal option
74
- # currently, support fachinfo/patinfo format only
75
- require 'ydocx/templates/fachinfo'
76
- options.merge!({:style => :frame})
77
- else
78
- self.error "#{self.command}: exit with #{option}: Unknown option"
79
- end
80
- else
81
- # default fachinfo
82
- require 'ydocx/templates/fachinfo'
83
- options.merge!({:style => :frame}) if action == :to_html
84
- end
85
- doc = YDocx::Document.open(path)
98
+ options = self.parse(action, argv)
99
+ doc = YDocx::Document.open(path, options)
86
100
  doc.send(action, path, options)
87
101
  ext = self.extname(action)
88
102
  self.report action, doc.output_file(ext[1..-1])
@@ -2,11 +2,11 @@
2
2
  # encoding: utf-8
3
3
 
4
4
  require 'pathname'
5
- require 'zip/zip'
5
+ require 'zip'
6
6
  begin
7
- require 'RMagick'
7
+ require 'rmagick'
8
8
  rescue LoadError
9
- warn "Couldn't load RMagick: .wmf conversion off"
9
+ warn "Couldn't load rmagick: .wmf conversion off"
10
10
  end
11
11
  require 'ydocx/parser'
12
12
  require 'ydocx/builder'
@@ -83,7 +83,7 @@ module YDocx
83
83
  def create_files
84
84
  files_dir = output_directory
85
85
  mkdir Pathname.new(files_dir) unless files_dir.exist?
86
- @zip = Zip::ZipFile.open(@path.realpath)
86
+ @zip = Zip::File.open(@path.realpath)
87
87
  @images.each do |image|
88
88
  origin_path = Pathname.new image[:origin] # media/filename.ext
89
89
  source_path = Pathname.new image[:source] # images/filename.ext
@@ -118,10 +118,11 @@ module YDocx
118
118
  end
119
119
  def read(file)
120
120
  @path = Pathname.new file
121
- @zip = Zip::ZipFile.open(@path.realpath)
121
+ @zip = Zip::File.open(@path.realpath)
122
122
  doc = @zip.find_entry('word/document.xml').get_input_stream
123
123
  rel = @zip.find_entry('word/_rels/document.xml.rels').get_input_stream
124
124
  @parser = Parser.new(doc, rel) do |parser|
125
+ parser.lang = @options[:lang] if @options[:lang]
125
126
  @contents = parser.parse
126
127
  @indecies = parser.indecies
127
128
  @images = parser.images
@@ -8,7 +8,7 @@ require 'ydocx/markup_method'
8
8
  module YDocx
9
9
  class Parser
10
10
  include MarkupMethod
11
- attr_accessor :indecies, :images, :result, :space
11
+ attr_accessor :indecies, :images, :result, :space, :lang
12
12
  def initialize(doc, rel)
13
13
  @doc = Nokogiri::XML.parse(doc)
14
14
  @rel = Nokogiri::XML.parse(rel)
@@ -55,7 +55,7 @@ module YDocx
55
55
  if symbol
56
56
  _text = ''
57
57
  text.unpack('U*').each do |char|
58
- _text << optional_replace(char.to_s(16))
58
+ _text << character_replace(char.to_s(16))
59
59
  end
60
60
  text = _text
61
61
  end
@@ -76,14 +76,14 @@ module YDocx
76
76
  end
77
77
  text
78
78
  end
79
- def optional_escape(text)
79
+ def character_encode(text)
80
80
  text.force_encoding('utf-8')
81
81
  # NOTE
82
82
  # :named only for escape at Builder
83
83
  text = @coder.encode(text, :named)
84
84
  text
85
85
  end
86
- def optional_replace(code)
86
+ def character_replace(code)
87
87
  code = '0x' + code
88
88
  # NOTE
89
89
  # replace with rsemble html character ref
@@ -143,6 +143,9 @@ module YDocx
143
143
  #p "char : " + @coder.decode("&#%s;" % code.hex.to_s)
144
144
  end
145
145
  end
146
+ def optional_escape(text)
147
+ text
148
+ end
146
149
  def parse_block(node)
147
150
  nil # default no block element
148
151
  end
@@ -155,19 +158,19 @@ module YDocx
155
158
  ns = r.namespaces.merge additional_namespaces
156
159
  [
157
160
  { # old type shape
158
- :attr => 'id',
161
+ :attr => 'r:id',
159
162
  :path => 'w:pict//v:shape//v:imagedata',
160
163
  :wrap => 'w:pict//v:shape//w10:wrap',
161
164
  :type => '',
162
165
  },
163
166
  { # in anchor
164
- :attr => 'embed',
167
+ :attr => 'r:embed',
165
168
  :path => 'w:drawing//wp:anchor//a:graphic//a:graphicData//pic:pic//pic:blipFill//a:blip',
166
169
  :wrap => 'w:drawing//wp:anchor//wp:wrapTight',
167
170
  :type => 'wrapText',
168
171
  },
169
172
  { # stand alone
170
- :attr => 'embed',
173
+ :attr => 'r:embed',
171
174
  :path => 'w:drawing//a:graphic//a:graphicData//pic:pic//pic:blipFill//a:blip',
172
175
  :wrap => 'w:drawing//wp:wrapTight',
173
176
  :type => 'wrapText',
@@ -229,8 +232,8 @@ module YDocx
229
232
  end
230
233
  end
231
234
  unless r.xpath('w:sym').empty?
232
- code = r.xpath('w:sym').first['char'].downcase # w:char
233
- content << optional_replace(code)
235
+ code = r.xpath('w:sym').first['w:char'].downcase # w:char
236
+ content << character_replace(code)
234
237
  pos += 1
235
238
  end
236
239
  if !r.xpath('w:pict').empty? or !r.xpath('w:drawing').empty?
@@ -276,6 +279,7 @@ module YDocx
276
279
  end
277
280
  def parse_text(r, lstrip=false)
278
281
  text = r.xpath('w:t').map(&:text).join('')
282
+ text = character_encode(text)
279
283
  text = optional_escape(text)
280
284
  text = text.lstrip if lstrip
281
285
  if rpr = r.xpath('w:rPr')
@@ -6,45 +6,100 @@ require 'cgi'
6
6
  module YDocx
7
7
  class Parser
8
8
  attr_accessor :code, :lang
9
+ @@figure_pattern = /&lsquo;|&rsquo;|&apos;|&acute;/
9
10
  def init
10
11
  @image_path = 'image'
11
12
  @code = nil
12
- @lang = 'DE'
13
+ @lang ||= 'de'
13
14
  end
14
- private
15
+ ###
16
+ # Fachinfo Chapters
17
+ # 1. name
18
+ # 2. composition
19
+ # 3. galenic form
20
+ # 4. indications
21
+ # 5. usage
22
+ # 6. contra_indications
23
+ # 7. restrictions
24
+ # 8. interactions
25
+ # 9. pregnancy
26
+ # 10. driving_ability
27
+ # 11. unwanted_effects
28
+ # 12. overdose
29
+ # 13. effects
30
+ # 14. kinetic
31
+ # 15. preclinic
32
+ # 16. other_advice
33
+ # 17. iksnr
34
+ # 19. packages
35
+ # 19. registration_owner
36
+ # 20. date
15
37
  def chapters
16
- # TODO
17
- # Franzoesisch
18
38
  chapters = {
19
- 'Name' => /^Name\s+des\s+Pr&auml;parates$/u, # 1
20
- 'Zusammens.' => /^Zusammensetzung($|\s*\/\s*(Wirkstoffe|Hilsstoffe)$)/u, # 2
21
- 'Galen.Form' => /^Galenische\s+Form\s*(und|\/)\s*Wirkstoffmenge\s+pro\s+Einheit$|^Forme\s*gal.nique/iu, # 3
22
- 'Ind./Anw.m&ouml;gl.' => /^Indikationen(\s+|\s*(\/|und)\s*)Anwendungsm&ouml;glichkeiten$|^Indications/u, # 4
23
- 'Dos./Anw.' => /^Dosierung\s*(\/|und)\s*Anwendung/u, # 5
24
- 'Kontraind.' => /^Kontraindikationen($|\s*\(\s*absolute\s+Kontraindikationen\s*\)$)/u, # 6
25
- 'Warn.hinw.' => /^Warnhinweise\s+und\s+Vorsichtsmassnahmen($|\s*\/\s*(relative\s+Kontraindikationen|Warnhinweise\s*und\s*Vorsichtsmassnahmen)$)/u, # 7
26
- 'Interakt.' => /^Interaktionen$|^Interactions/u, # 8
27
- 'Schwangerschaft' => /^Schwangerschaft(,\s*|\s*\/\s*|\s+und\s+)Stillzeit$/u, # 9
28
- 'Fahrt&uuml;cht.' => /^Wirkung\s+auf\s+die\s+Fahrt&uuml;chtigkeit\s+und\s+auf\s+das\s+Bedienen\s+von\s+Maschinen$/u, # 10
29
- 'Unerw.Wirkungen' => /^Unerw&uuml;nschte\s+Wirkungen$/u, # 11
30
- '&Uuml;berdos.' => /^&Uuml;berdosierung$|^Surdosage$/u, # 12
31
- 'Eigensch.' => /^Eigenschaften\s*\/\s*Wirkungen($|\s*\(\s*(ATC\-Code|Wirkungsmechanismus|Pharmakodyamik|Klinische\s+Wirksamkeit)\s*\)\s*$)|^Propri.t.s/iu, # 13
32
- 'Pharm.kinetik' => /^Pharmakokinetik($|\s*\((Absorption,\s*Distribution,\s*Metabolisms,\s*Elimination\s|Kinetik\s+spezieller\s+Patientengruppen)*\)$)|^Pharmacocin.tique?/iu, # 14
33
- 'Pr&auml;klin.' => /^Pr&auml;klinische\s+Daten$/u, # 15
34
- 'Sonstige H.' => /^Sonstige\s*Hinweise($|\s*\(\s*(Inkompatibilit&auml;ten|Beeinflussung\s*diagnostischer\s*Methoden|Haltbarkeit|Besondere\s*Lagerungshinweise|Hinweise\s+f&uuml;r\s+die\s+Handhabung)\s*\)$)|^Remarques/u, # 16
35
- 'Swissmedic-Nr.' => /^Zulassungsnummer(n|:|$|\s*\(\s*Swissmedic\s*\)$)/u, # 17
36
- 'Packungen' => /^Packungen($|\s*\(\s*mit\s+Angabe\s+der\s+Abgabekategorie\s*\)$)/u, # 18
37
- 'Reg.Inhaber' => /^Zulassungsinhaberin($|\s*\(\s*Firma\s+und\s+Sitz\s+gem&auml;ss\s*Handelsregisterauszug\s*\))/u, # 19
38
- 'Stand d. Info.' => /^Stand\s+der\s+Information$|^Mise\s+.\s+jour$/iu, # 20
39
+ :de => {
40
+ 'name' => /^Name\s+des\s+Präparates$/u, # 1
41
+ 'composition' => /^Zusammensetzung|Wirkstoffe|Hilsstoffe/u, # 2
42
+ 'galenic_form' => /^Galenische\s+Form\s*(und|\/)\s*Wirkstoffmenge\s+pro\s+Einheit$/iu, # 3
43
+ 'indications' => /^Indikationen(\s+|\s*(\/|und)\s*)Anwendungsmöglichkeiten$/u, # 4
44
+ 'usage' => /^Dosierung\s*(\/|und)\s*Anwendung/u, # 5
45
+ 'contra_indications' => /^Kontraindikationen($|\s*\(\s*absolute\s+Kontraindikationen\s*\)$)/u, # 6
46
+ 'restrictions' => /^Warnhinweise\s+und\s+Vorsichtsmassnahmen($|\s*\/\s*(relative\s+Kontraindikationen|Warnhinweise\s*und\s*Vorsichtsmassnahmen)$)/u, # 7
47
+ 'interactions' => /^Interaktionen$/u, # 8
48
+ 'pregnancy' => /^Schwangerschaft(,\s*|\s*\/\s*|\s+und\s+)Stillzeit$/u, # 9
49
+ 'driving_ability' => /^Wirkung\s+auf\s+die\s+Fahrtüchtigkeit\s+und\s+auf\s+das\s+Bedienen\s+von\s+Maschinen$/u, # 10
50
+ 'unwanted_effects' => /^Unerwünschte\s+Wirkungen$/u, # 11
51
+ 'overdose' => /^Überdosierung$/u, # 12
52
+ 'effects' => /^Eigenschaften\s*\/\s*Wirkungen($|\s*\(\s*(ATC\-Code|Wirkungsmechanismus|Pharmakodyamik|Klinische\s+Wirksamkeit)\s*\)\s*$)/iu, # 13
53
+ 'kinetic' => /^Pharmakokinetik($|\s*\((Absorption,\s*Distribution,\s*Metabolisms,\s*Elimination\s|Kinetik\s+spezieller\s+Patientengruppen)*\)$)/iu, # 14
54
+ 'preclinic' => /^Präklinische\s+Daten$/u, # 15
55
+ 'other_advice' => /^Sonstige\s*Hinweise($|\s*\(\s*(Inkompatibilitäten|Beeinflussung\s*diagnostischer\s*Methoden|Haltbarkeit|Besondere\s*Lagerungshinweise|Hinweise\s+für\s+die\s+Handhabung)\s*\)$)|^Remarques/u, # 16
56
+ 'iksnrs' => /^Zulassungsnummer(n|:|$|\s*\(\s*Swissmedic\s*\)$)/u, # 17
57
+ 'packages' => /^Packungen($|\s*\(\s*mit\s+Angabe\s+der\s+Abgabekategorie\s*\)$)/u, # 18
58
+ 'registration_owner' => /^Zulassungsinhaberin($|\s*\(\s*Firma\s+und\s+Sitz\s+gemäss\s*Handelsregisterauszug\s*\))/u, # 19
59
+ 'date' => /^Stand\s+der\s+Information$/iu, # 20
60
+ 'fabrication' => /^Herstellerin/u,
61
+ 'company' => /^Vertriebsfirma/u,
62
+ },
63
+ :fr => {
64
+ 'name' => /^Nom$/u, # 1
65
+ 'composition' => /^Composition$/u, # 2
66
+ 'galenic_form' => /^Forme\s+galénique\s+et\s+quantité\s+de\s+principe\s+actif\s+par\s+unité|^Forme\s*gal.nique/iu, # 3
67
+ 'indications' => /^Indications/u, # 4
68
+ 'usage' => /^Posologiei/u, # 5
69
+ 'contra_indications' => /^Contre\-indications/iu, # 6
70
+ 'restrictions' => /^Mises/u, # 7
71
+ 'interactions' => /^Interactions/u, # 8
72
+ 'pregnancy' => /^Grossesse\s*\/\s*Allaitement/u, # 9
73
+ 'driving_ability' => /^Effet\s+sur\s+l'aptitude\s+à;\s+la\s+conduite\s+et\s+l'utilisation\s+de\s+machines/u, # 10
74
+ 'unwanted_effects' => /^Effets/u, # 11
75
+ 'overdose' => /^Surdosage$/u, # 12
76
+ 'effects' => /^Propriétés/iu, # 13
77
+ 'kinetic' => /^Pharmacocinétique$/iu, # 14
78
+ 'preclinic' => /^Données\s+précliniques$/u, # 15
79
+ 'other_advice' => /^Remarques/u, # 16
80
+ 'iksnrs' => /^Numéro\s+d'autorisation$/u, # 17
81
+ 'packages' => /^Présentation/iu, # 18
82
+ 'registration_owner' => /^Titulaire\s+de\s+l'autorisation$/u, # 19
83
+ 'date' => /^Mise à jour/iu, # 20
84
+ 'fabrication' => /^Fabricant$/u,
85
+ 'company' => /^Distributeur/u,
86
+ }
39
87
  }
88
+ if @lang == 'fr' || @lang == :fr
89
+ chapters[:fr]
90
+ else
91
+ chapters[:de]
92
+ end
40
93
  end
94
+ private
41
95
  def escape_id(text)
42
- CGI.escape(text.gsub(/&(.)uml;/, '\1e').gsub(/\s*\/\s*|\s+|\/|\-/, '_').gsub(/\./, '').downcase)
96
+ CGI.escape(text.
97
+ gsub(/&(.)uml;/, '\1e').gsub(/&apos;/, '').gsub(/&(eacute|agrave);/, 'e').
98
+ gsub(/\s*\/\s*|\s+|\/|\-/, '_').gsub(/\./, '').downcase)
43
99
  end
44
- def parse_code(text) # swissmedic nummer
45
- if text =~ /^\s*(\d{2})(&lsquo;|&rsquo;|&apos;|.|\s*)(\d{3})\s*\(\s*Swiss\s*medic\s*\)(\s*|.)$/iu
46
- @code = "%5d" % ($1 + $3)
47
- elsif text =~ /^\s*(\d{5})(.*|\s*)\s*\(\s*Swiss\s*medic\s*\)(\s*|.)$/iu
100
+ def parse_code(text) # swissmedic number
101
+ if text.gsub(@@figure_pattern, '') =~
102
+ /^\s*(\d{5})(.*|\s*)\s*\(\s*Swiss\s*medic\s*\)(\s*|.)$/iu
48
103
  @code = "%5d" % $1
49
104
  else
50
105
  nil
@@ -54,18 +109,19 @@ module YDocx
54
109
  return markup(:h2, text, {:id => id})
55
110
  end
56
111
  def parse_title(node, text)
57
- if @indecies.empty? and !text.empty? and
58
- (node.previous.inner_text.strip.empty? or node.parent.previous.nil?)
112
+ if @indecies.empty? and !text.empty? and node.previous and
113
+ (node.parent.previous.nil? or node.previous.inner_text.strip.empty?)
59
114
  # The first line as package name
60
- @indecies << {:text => 'Titel', :id => 'titel'}
61
- return markup(:h1, text, {:id => 'titel'})
115
+ title = (@lang == 'fr' ? 'Titre' : 'Titel')
116
+ @indecies << {:text => title, :id => title.downcase}
117
+ return markup(:h1, text, {:id => title.downcase})
62
118
  else
63
119
  return nil
64
120
  end
65
121
  end
66
122
  def parse_block(node)
67
123
  text = node.inner_text.strip
68
- text = optional_escape text
124
+ text = character_encode(text)
69
125
  chapters.each_pair do |chapter, regexp|
70
126
  if text =~ regexp
71
127
  # allow without line break
@@ -73,13 +129,12 @@ module YDocx
73
129
  id = escape_id(chapter)
74
130
  @indecies << {:text => chapter, :id => id}
75
131
  return parse_heading(text, id)
76
- elsif parse_code(text)
77
- return nil
78
132
  end
79
133
  end
80
134
  if title = parse_title(node, text)
81
135
  return title
82
136
  end
137
+ parse_code(text)
83
138
  return nil
84
139
  end
85
140
  end
@@ -177,11 +232,13 @@ div#container {
177
232
  end
178
233
  style.gsub(/\s\s+|\n/, ' ')
179
234
  end
180
- def resolve_path(path)
235
+ def resolve_path(path) # image src
181
236
  if reference = @references.shift
182
237
  File.dirname(path) + '/' + reference.basename.to_s
183
- else
238
+ elsif @files.to_s =~ /\d{5}/
184
239
  path
240
+ else
241
+ @files.join path
185
242
  end
186
243
  end
187
244
  end
@@ -202,10 +259,11 @@ div#container {
202
259
  end
203
260
  @files
204
261
  end
205
- def output_file(ext)
262
+ def output_file(ext) # html
263
+ lang = (@parser.lang.downcase == 'fr' ? 'fr' : 'de')
206
264
  if @parser.code
207
265
  filename = @parser.code
208
- output_directory.join "#{filename}.#{ext.to_s}"
266
+ output_directory.join "#{lang}_#{filename}.#{ext.to_s}"
209
267
  else # default
210
268
  @path.sub_ext(".#{ext.to_s}")
211
269
  end
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ module YDocx
4
+ VERSION = '1.2.6'
5
+ end
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ require 'spec_helper'
4
+
5
+ RSpec.configure do |config|
6
+ config.expect_with(:rspec) { |c| c.syntax = :should }
7
+ end
8
+
9
+ describe "ydocx" do
10
+
11
+ before :all do
12
+ end
13
+
14
+ before :each do
15
+ Dir.glob("#{YDcoxHelper::DataDir}/*.xml").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
16
+ Dir.glob("#{YDcoxHelper::DataDir}/*.html").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
17
+ end
18
+
19
+ after :each do
20
+ end
21
+
22
+ after :all do
23
+ Dir.glob("#{YDcoxHelper::DataDir}/*.xml").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
24
+ Dir.glob("#{YDcoxHelper::DataDir}/*.html").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
25
+ end
26
+
27
+ it "should convert sinovial_FR to xml" do
28
+ require 'ydocx/templates/fachinfo'
29
+ sinovial_FR = File.join(YDcoxHelper::DataDir, 'Sinovial_FR.docx')
30
+ File.exists?(sinovial_FR).should be true
31
+ doc = YDocx::Document.open(sinovial_FR, { :lang => :fr})
32
+ sinovial_FR_xml = sinovial_FR.sub('.docx', '.xml')
33
+ doc.to_xml(sinovial_FR_xml, {:format => :fachinfo})
34
+ out = doc.output_file('xml')
35
+ File.exists?(sinovial_FR_xml).should be true
36
+ doc.parser.lang.to_s.should == 'fr'
37
+ doc = Nokogiri::XML(open(sinovial_FR_xml))
38
+ doc.xpath('//chapters/chapter[contains(heading, "Fabricant")]').size.should > 0
39
+ doc.xpath('//chapters/chapter[contains(heading, "Distributeur")]').size.should > 0
40
+ doc.xpath('//chapters/chapter[contains(heading, "Remarques particulières")]').size.should > 0
41
+ end
42
+
43
+ it "should convert sinovial_DE to xml" do
44
+ sinovial_DE = File.join(YDcoxHelper::DataDir, 'Sinovial_DE.docx')
45
+ File.exists?(sinovial_DE).should be true
46
+ doc = YDocx::Document.open(sinovial_DE)
47
+ sinovial_DE_xml = sinovial_DE.sub('.docx', '.xml')
48
+ doc.to_xml(sinovial_DE_xml, {:format => :fachinfo})
49
+ out = doc.output_file('xml')
50
+ File.exists?(sinovial_DE_xml).should be true
51
+ doc.parser.lang.to_s.should == 'de'
52
+ doc = Nokogiri::XML(open(sinovial_DE_xml))
53
+ doc.xpath('//chapters/chapter[contains(heading, "Packung")]').size.should > 0
54
+ doc.xpath('//chapters/chapter[contains(heading, "Hersteller")]').size.should > 0
55
+ doc.xpath('//chapters/chapter[contains(heading, "Vertriebsfirma")]').size.should > 0
56
+ end
57
+
58
+ it "should convert sinovial_DE to html" do
59
+ sinovial_DE = File.join(YDcoxHelper::DataDir, 'Sinovial_DE.docx')
60
+ File.exists?(sinovial_DE).should be true
61
+ doc = YDocx::Document.open(sinovial_DE)
62
+ sinovial_DE_html = sinovial_DE.sub('.docx', '.html')
63
+ doc.to_html(sinovial_DE_html, {:format => :fachinfo})
64
+ out = doc.output_file('html')
65
+ File.exists?(sinovial_DE_html).should be true
66
+ end
67
+
68
+ it "should convert various pseudo fachinfo to xml" do
69
+ require 'ydocx/templates/fachinfo'
70
+ files = [ 'Sinovial_0.8_DE.docx', 'Sinovial_0.8_FR.docx',
71
+ 'Sinovial_DE.docx', 'Sinovial_FR.docx',
72
+ ]
73
+ files.each {
74
+ |file|
75
+ file_name = File.join(YDcoxHelper::DataDir, file)
76
+ File.exists?(file_name).should be true
77
+ file.match('_DE') ? lang = 'de' : lang = 'fr'
78
+ doc = YDocx::Document.open(file_name, { :lang => lang} )
79
+ file_name_xml = file_name.sub('.docx', '.xml')
80
+ doc.to_xml(file_name_xml, {:format => :fachinfo})
81
+ out = doc.output_file('xml')
82
+ File.exists?(file_name_xml).should be true
83
+ doc.parser.lang.should == lang
84
+ doc = Nokogiri::XML(open(file_name_xml))
85
+ }
86
+ end
87
+ end
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'simplecov'
5
+ if ENV['COVERAGE']
6
+ SimpleCov.start
7
+ end
8
+
9
+ require 'fileutils'
10
+ require 'pp'
11
+ require 'ydocx'
12
+
13
+ module YDcoxHelper
14
+ DataDir = File.join(File.dirname(__FILE__), '..', 'spec', 'data')
15
+ end
16
+ require 'rspec'
17
+ require 'ydocx'
metadata CHANGED
@@ -1,88 +1,64 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ydocx
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
5
- prerelease:
4
+ version: 1.2.6
6
5
  platform: ruby
7
6
  authors:
8
- - Yasuhiro Asaka, Zeno R.R. Davatz
7
+ - "'Yasuhiro Asak, Zeno R.R. Davatz"
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-07-11 00:00:00.000000000 Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
15
- name: rdoc
16
- requirement: &6667440 !ruby/object:Gem::Requirement
17
- none: false
18
- requirements:
19
- - - ~>
20
- - !ruby/object:Gem::Version
21
- version: '3.10'
22
- type: :development
23
- prerelease: false
24
- version_requirements: *6667440
25
- - !ruby/object:Gem::Dependency
26
- name: hoe
27
- requirement: &6667000 !ruby/object:Gem::Requirement
28
- none: false
29
- requirements:
30
- - - ~>
31
- - !ruby/object:Gem::Version
32
- version: '2.13'
33
- type: :development
34
- prerelease: false
35
- version_requirements: *6667000
11
+ date: 2021-01-20 00:00:00.000000000 Z
12
+ dependencies: []
36
13
  description: ''
37
- email:
38
- - yasaka@ywesee.com, zdavatz@ywesee.com
14
+ email: yasaka@ywesee.com, zdavatz@ywesee.com
39
15
  executables:
40
- - docx2html
41
16
  - docx2xml
17
+ - docx2html
42
18
  extensions: []
43
- extra_rdoc_files:
44
- - History.txt
45
- - Manifest.txt
46
- - README.txt
19
+ extra_rdoc_files: []
47
20
  files:
48
21
  - History.txt
49
22
  - Manifest.txt
50
23
  - README.txt
51
- - Rakefile
52
24
  - bin/docx2html
53
25
  - bin/docx2xml
54
26
  - lib/ydocx.rb
55
27
  - lib/ydocx/builder.rb
28
+ - lib/ydocx/command.rb
56
29
  - lib/ydocx/document.rb
57
30
  - lib/ydocx/markup_method.rb
58
31
  - lib/ydocx/parser.rb
59
- - lib/ydocx/command.rb
60
32
  - lib/ydocx/templates/fachinfo.rb
61
33
  - lib/ydocx/templates/patinfo.rb
62
- homepage: https://github.com/zdavatz/ydocx
63
- licenses: []
34
+ - lib/ydocx/version.rb
35
+ - spec/data/Sinovial_0.8_DE.docx
36
+ - spec/data/Sinovial_0.8_FR.docx
37
+ - spec/data/Sinovial_DE.docx
38
+ - spec/data/Sinovial_FR.docx
39
+ - spec/lib/sinovial_spec.rb
40
+ - spec/spec_helper.rb
41
+ homepage: https://github.com/zdavatz/ydocx/
42
+ licenses:
43
+ - GPL-3.0
44
+ metadata: {}
64
45
  post_install_message:
65
- rdoc_options:
66
- - --main
67
- - README.txt
46
+ rdoc_options: []
68
47
  require_paths:
69
48
  - lib
70
49
  required_ruby_version: !ruby/object:Gem::Requirement
71
- none: false
72
50
  requirements:
73
- - - ! '>='
51
+ - - ">="
74
52
  - !ruby/object:Gem::Version
75
53
  version: '0'
76
54
  required_rubygems_version: !ruby/object:Gem::Requirement
77
- none: false
78
55
  requirements:
79
- - - ! '>='
56
+ - - ">="
80
57
  - !ruby/object:Gem::Version
81
58
  version: '0'
82
59
  requirements: []
83
- rubyforge_project: ydocx
84
- rubygems_version: 1.8.15
60
+ rubygems_version: 3.1.2
85
61
  signing_key:
86
- specification_version: 3
87
- summary: ''
62
+ specification_version: 4
63
+ summary: Parsing docx files with Ruby and output them as HTML and XML
88
64
  test_files: []
data/Rakefile DELETED
@@ -1,17 +0,0 @@
1
- # -*- ruby -*-
2
-
3
- require 'rubygems'
4
- require 'hoe'
5
-
6
- # Hoe.plugin :compiler
7
- # Hoe.plugin :gem_prelude_sucks
8
- # Hoe.plugin :inline
9
- # Hoe.plugin :minitest
10
- # Hoe.plugin :racc
11
- # Hoe.plugin :rubyforge
12
-
13
- Hoe.spec 'ydocx' do
14
-
15
- developer('Yasuhiro Asaka, Zeno R.R. Davatz', 'yasaka@ywesee.com, zdavatz@ywesee.com')
16
-
17
- end