ydocx 1.2.1 → 1.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/History.txt +23 -0
- data/README.txt +16 -4
- data/lib/ydocx.rb +0 -4
- data/lib/ydocx/command.rb +50 -36
- data/lib/ydocx/document.rb +6 -5
- data/lib/ydocx/parser.rb +13 -9
- data/lib/ydocx/templates/fachinfo.rb +98 -40
- data/lib/ydocx/version.rb +5 -0
- data/spec/data/Sinovial_0.8_DE.docx +0 -0
- data/spec/data/Sinovial_0.8_FR.docx +0 -0
- data/spec/data/Sinovial_DE.docx +0 -0
- data/spec/data/Sinovial_FR.docx +0 -0
- data/spec/lib/sinovial_spec.rb +87 -0
- data/spec/spec_helper.rb +17 -0
- metadata +25 -49
- data/Rakefile +0 -17
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d110c0230d26d291bbc973e022379d669a7fa963dc0baf27fa3b0ccaaa193bab
|
4
|
+
data.tar.gz: f21bfa758230376fadeab73e75645dd06862957c0d8c7d120bd489d7ffeb9b1b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 75d8e7bed0a19192c43c4b75be52b9c5f9da1ef265052420d299b810189faf3b880cc3e5f207621695a6cd8d156c477aed00c87b6e82366c7d8dedc012411e90
|
7
|
+
data.tar.gz: 84bb26bab8bc03ea116f90f1df1708d4f646f0b0094a6e685723f2e7448e409a5db8c46845c54d44ee723fd9dd0cf866946d49e8b15f6a0a56d63c01610925cc
|
data/History.txt
CHANGED
@@ -1,3 +1,26 @@
|
|
1
|
+
=== 1.2.6 / 19.01.2021
|
2
|
+
|
3
|
+
* Updated to use for ruby 3.0.0. RMagick->rmagic
|
4
|
+
* Replaced hoe by bundler/gem_tasks
|
5
|
+
|
6
|
+
=== 1.2.5 / 07.07.2014
|
7
|
+
|
8
|
+
* Updated to use rubyzip >= 1.0.0
|
9
|
+
|
10
|
+
=== 1.2.4 / 26.02.2014
|
11
|
+
|
12
|
+
* Many changes by Niklaus Giger to get ready for Pseudo-Fachinformation.
|
13
|
+
|
14
|
+
=== 1.2.3 / 12.07.2012
|
15
|
+
|
16
|
+
* Updated french char handling, lang in filename
|
17
|
+
|
18
|
+
=== 1.2.2 / 12.07.2012
|
19
|
+
|
20
|
+
* Fixed typo in french
|
21
|
+
* Updated help output
|
22
|
+
* Updated help message about --lang option
|
23
|
+
|
1
24
|
=== 1.2.1 / 11.07.2012
|
2
25
|
|
3
26
|
* Updated template, option handling for extension
|
data/README.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
== ydocx
|
1
|
+
== ydocx by ywesee GmbH
|
2
2
|
|
3
3
|
* https://github.com/zdavatz/ydocx
|
4
4
|
* Parsing docx files with Ruby and output them as HTML and XML.
|
@@ -18,17 +18,29 @@
|
|
18
18
|
|
19
19
|
== Usage
|
20
20
|
|
21
|
-
* Usage:
|
21
|
+
* Usage: docx2html file [options]
|
22
22
|
-f, --format Format of style and chapter {(fi|fachinfo)|(pi|patinfo)|(pl|plain)|none}, default fachinfo.
|
23
23
|
-h, --help Display this help message.
|
24
|
+
-l, --lang Language option for templates {de|fr}
|
24
25
|
-v, --version Show version.
|
25
26
|
|
26
27
|
== Using the great libraries
|
27
28
|
|
28
|
-
* rubyzip
|
29
|
+
* rubyzip (< 1.0.0)
|
29
30
|
* nokogiri
|
30
31
|
* htmlentities
|
31
|
-
*
|
32
|
+
* rmagick
|
33
|
+
|
34
|
+
Install them using bundle install.
|
35
|
+
|
36
|
+
Requires Ruby >= 1.9.2.
|
37
|
+
|
38
|
+
== TODO
|
39
|
+
|
40
|
+
For unknown reason the gem fails to parse properly the Présentation for sinovial_fr.
|
41
|
+
Failing spec is: ydocx should convert sinovial_FR to xml
|
42
|
+
Failure/Error: doc.xpath('//chapters/chapter[contains(heading, "Présentation")]').size.should > 0
|
43
|
+
|
32
44
|
|
33
45
|
== License GPLv3.0
|
34
46
|
|
data/lib/ydocx.rb
CHANGED
data/lib/ydocx/command.rb
CHANGED
@@ -6,8 +6,9 @@ require 'ydocx'
|
|
6
6
|
module YDocx
|
7
7
|
class Command
|
8
8
|
class << self
|
9
|
-
@@help = /^\-(h|\-help)$/u
|
10
9
|
@@format = /^\-(f|\-format)$/u
|
10
|
+
@@help = /^\-(h|\-help)$/u
|
11
|
+
@@lang = /^\-(l|\-lang)$/u
|
11
12
|
@@version = /^\-(v|\-version)$/u
|
12
13
|
def error(message='')
|
13
14
|
puts message
|
@@ -25,11 +26,57 @@ module YDocx
|
|
25
26
|
Usage: #{self.command} file [options]
|
26
27
|
-f, --format Format of style and chapter {(fi|fachinfo)|(pi|patinfo)|(pl|plain)|none}, default fachinfo.
|
27
28
|
-h, --help Display this help message.
|
29
|
+
-l, --lang Language option for templates {de|fr}
|
28
30
|
-v, --version Show version.
|
29
31
|
BANNER
|
30
32
|
puts banner
|
31
33
|
exit
|
32
34
|
end
|
35
|
+
def parse(action, argv)
|
36
|
+
if argv.length.odd?
|
37
|
+
self.error "#{self.command}: exit with: Invalid option"
|
38
|
+
else
|
39
|
+
args = Hash[*argv]
|
40
|
+
end
|
41
|
+
options = {}
|
42
|
+
args.each_pair do |option, value|
|
43
|
+
if option =~ @@format
|
44
|
+
case value
|
45
|
+
when 'fi', 'fachinfo'
|
46
|
+
require 'ydocx/templates/fachinfo'
|
47
|
+
options.merge!({:style => :frame}) if action == :to_html
|
48
|
+
when 'pi', 'patinfo'
|
49
|
+
require 'ydocx/templates/patinfo'
|
50
|
+
options.merge!({:style => :frame}) if action == :to_html
|
51
|
+
when 'pl', 'plain'
|
52
|
+
options.merge!({:style => true}) if action == :to_html
|
53
|
+
when 'none'
|
54
|
+
# pass
|
55
|
+
else
|
56
|
+
self.error "#{self.command}: exit with #{option}: Invalid argument"
|
57
|
+
end
|
58
|
+
elsif option =~ @@help
|
59
|
+
self.help
|
60
|
+
elsif option =~ @@lang
|
61
|
+
options.merge!({:lang => value})
|
62
|
+
elsif option.downcase =~ /\.(jpeg|jpg|png|gif)$/u and action == :to_html
|
63
|
+
# allow as default
|
64
|
+
# TODO
|
65
|
+
# refactor as normal option
|
66
|
+
# currently, support fachinfo/patinfo format only
|
67
|
+
require 'ydocx/templates/fachinfo'
|
68
|
+
options.merge!({:style => :frame})
|
69
|
+
else
|
70
|
+
self.error "#{self.command}: exit with #{option}: Unknown option"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
if !args.include?('-f') and !args.include?('--format')
|
74
|
+
# default fachinfo
|
75
|
+
require 'ydocx/templates/fachinfo'
|
76
|
+
options.merge!({:style => :frame}) if action == :to_html
|
77
|
+
end
|
78
|
+
options
|
79
|
+
end
|
33
80
|
def report(action, path)
|
34
81
|
puts "#{self.command}: generated #{File.expand_path(path)}"
|
35
82
|
exit
|
@@ -48,41 +95,8 @@ Usage: #{self.command} file [options]
|
|
48
95
|
elsif !File.extname(path).match(/^\.docx$/)
|
49
96
|
self.error "#{self.command}: cannot open #{file}: Not a docx file"
|
50
97
|
else
|
51
|
-
options =
|
52
|
-
|
53
|
-
if option =~ @@format
|
54
|
-
case argv[0]
|
55
|
-
when 'fi', 'fachinfo'
|
56
|
-
require 'ydocx/templates/fachinfo'
|
57
|
-
options.merge!({:style => :frame}) if action == :to_html
|
58
|
-
when 'pi', 'patinfo'
|
59
|
-
require 'ydocx/templates/patinfo'
|
60
|
-
options.merge!({:style => :frame}) if action == :to_html
|
61
|
-
when 'pl', 'plain'
|
62
|
-
options.merge!({:style => true}) if action == :to_html
|
63
|
-
when 'none'
|
64
|
-
# pass
|
65
|
-
else
|
66
|
-
self.error "#{self.command}: exit with #{option}: Invalid argument"
|
67
|
-
end
|
68
|
-
elsif option =~ @@help
|
69
|
-
self.help
|
70
|
-
elsif option.downcase =~ /\.(jpeg|jpg|png|gif)$/u and action == :to_html
|
71
|
-
# allow as default
|
72
|
-
# TODO
|
73
|
-
# refactor as normal option
|
74
|
-
# currently, support fachinfo/patinfo format only
|
75
|
-
require 'ydocx/templates/fachinfo'
|
76
|
-
options.merge!({:style => :frame})
|
77
|
-
else
|
78
|
-
self.error "#{self.command}: exit with #{option}: Unknown option"
|
79
|
-
end
|
80
|
-
else
|
81
|
-
# default fachinfo
|
82
|
-
require 'ydocx/templates/fachinfo'
|
83
|
-
options.merge!({:style => :frame}) if action == :to_html
|
84
|
-
end
|
85
|
-
doc = YDocx::Document.open(path)
|
98
|
+
options = self.parse(action, argv)
|
99
|
+
doc = YDocx::Document.open(path, options)
|
86
100
|
doc.send(action, path, options)
|
87
101
|
ext = self.extname(action)
|
88
102
|
self.report action, doc.output_file(ext[1..-1])
|
data/lib/ydocx/document.rb
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
# encoding: utf-8
|
3
3
|
|
4
4
|
require 'pathname'
|
5
|
-
require 'zip
|
5
|
+
require 'zip'
|
6
6
|
begin
|
7
|
-
require '
|
7
|
+
require 'rmagick'
|
8
8
|
rescue LoadError
|
9
|
-
warn "Couldn't load
|
9
|
+
warn "Couldn't load rmagick: .wmf conversion off"
|
10
10
|
end
|
11
11
|
require 'ydocx/parser'
|
12
12
|
require 'ydocx/builder'
|
@@ -83,7 +83,7 @@ module YDocx
|
|
83
83
|
def create_files
|
84
84
|
files_dir = output_directory
|
85
85
|
mkdir Pathname.new(files_dir) unless files_dir.exist?
|
86
|
-
@zip = Zip::
|
86
|
+
@zip = Zip::File.open(@path.realpath)
|
87
87
|
@images.each do |image|
|
88
88
|
origin_path = Pathname.new image[:origin] # media/filename.ext
|
89
89
|
source_path = Pathname.new image[:source] # images/filename.ext
|
@@ -118,10 +118,11 @@ module YDocx
|
|
118
118
|
end
|
119
119
|
def read(file)
|
120
120
|
@path = Pathname.new file
|
121
|
-
@zip = Zip::
|
121
|
+
@zip = Zip::File.open(@path.realpath)
|
122
122
|
doc = @zip.find_entry('word/document.xml').get_input_stream
|
123
123
|
rel = @zip.find_entry('word/_rels/document.xml.rels').get_input_stream
|
124
124
|
@parser = Parser.new(doc, rel) do |parser|
|
125
|
+
parser.lang = @options[:lang] if @options[:lang]
|
125
126
|
@contents = parser.parse
|
126
127
|
@indecies = parser.indecies
|
127
128
|
@images = parser.images
|
data/lib/ydocx/parser.rb
CHANGED
@@ -8,7 +8,7 @@ require 'ydocx/markup_method'
|
|
8
8
|
module YDocx
|
9
9
|
class Parser
|
10
10
|
include MarkupMethod
|
11
|
-
attr_accessor :indecies, :images, :result, :space
|
11
|
+
attr_accessor :indecies, :images, :result, :space, :lang
|
12
12
|
def initialize(doc, rel)
|
13
13
|
@doc = Nokogiri::XML.parse(doc)
|
14
14
|
@rel = Nokogiri::XML.parse(rel)
|
@@ -55,7 +55,7 @@ module YDocx
|
|
55
55
|
if symbol
|
56
56
|
_text = ''
|
57
57
|
text.unpack('U*').each do |char|
|
58
|
-
_text <<
|
58
|
+
_text << character_replace(char.to_s(16))
|
59
59
|
end
|
60
60
|
text = _text
|
61
61
|
end
|
@@ -76,14 +76,14 @@ module YDocx
|
|
76
76
|
end
|
77
77
|
text
|
78
78
|
end
|
79
|
-
def
|
79
|
+
def character_encode(text)
|
80
80
|
text.force_encoding('utf-8')
|
81
81
|
# NOTE
|
82
82
|
# :named only for escape at Builder
|
83
83
|
text = @coder.encode(text, :named)
|
84
84
|
text
|
85
85
|
end
|
86
|
-
def
|
86
|
+
def character_replace(code)
|
87
87
|
code = '0x' + code
|
88
88
|
# NOTE
|
89
89
|
# replace with rsemble html character ref
|
@@ -143,6 +143,9 @@ module YDocx
|
|
143
143
|
#p "char : " + @coder.decode("&#%s;" % code.hex.to_s)
|
144
144
|
end
|
145
145
|
end
|
146
|
+
def optional_escape(text)
|
147
|
+
text
|
148
|
+
end
|
146
149
|
def parse_block(node)
|
147
150
|
nil # default no block element
|
148
151
|
end
|
@@ -155,19 +158,19 @@ module YDocx
|
|
155
158
|
ns = r.namespaces.merge additional_namespaces
|
156
159
|
[
|
157
160
|
{ # old type shape
|
158
|
-
:attr => 'id',
|
161
|
+
:attr => 'r:id',
|
159
162
|
:path => 'w:pict//v:shape//v:imagedata',
|
160
163
|
:wrap => 'w:pict//v:shape//w10:wrap',
|
161
164
|
:type => '',
|
162
165
|
},
|
163
166
|
{ # in anchor
|
164
|
-
:attr => 'embed',
|
167
|
+
:attr => 'r:embed',
|
165
168
|
:path => 'w:drawing//wp:anchor//a:graphic//a:graphicData//pic:pic//pic:blipFill//a:blip',
|
166
169
|
:wrap => 'w:drawing//wp:anchor//wp:wrapTight',
|
167
170
|
:type => 'wrapText',
|
168
171
|
},
|
169
172
|
{ # stand alone
|
170
|
-
:attr => 'embed',
|
173
|
+
:attr => 'r:embed',
|
171
174
|
:path => 'w:drawing//a:graphic//a:graphicData//pic:pic//pic:blipFill//a:blip',
|
172
175
|
:wrap => 'w:drawing//wp:wrapTight',
|
173
176
|
:type => 'wrapText',
|
@@ -229,8 +232,8 @@ module YDocx
|
|
229
232
|
end
|
230
233
|
end
|
231
234
|
unless r.xpath('w:sym').empty?
|
232
|
-
code = r.xpath('w:sym').first['char'].downcase # w:char
|
233
|
-
content <<
|
235
|
+
code = r.xpath('w:sym').first['w:char'].downcase # w:char
|
236
|
+
content << character_replace(code)
|
234
237
|
pos += 1
|
235
238
|
end
|
236
239
|
if !r.xpath('w:pict').empty? or !r.xpath('w:drawing').empty?
|
@@ -276,6 +279,7 @@ module YDocx
|
|
276
279
|
end
|
277
280
|
def parse_text(r, lstrip=false)
|
278
281
|
text = r.xpath('w:t').map(&:text).join('')
|
282
|
+
text = character_encode(text)
|
279
283
|
text = optional_escape(text)
|
280
284
|
text = text.lstrip if lstrip
|
281
285
|
if rpr = r.xpath('w:rPr')
|
@@ -6,45 +6,100 @@ require 'cgi'
|
|
6
6
|
module YDocx
|
7
7
|
class Parser
|
8
8
|
attr_accessor :code, :lang
|
9
|
+
@@figure_pattern = /‘|’|'|´/
|
9
10
|
def init
|
10
11
|
@image_path = 'image'
|
11
12
|
@code = nil
|
12
|
-
@lang
|
13
|
+
@lang ||= 'de'
|
13
14
|
end
|
14
|
-
|
15
|
+
###
|
16
|
+
# Fachinfo Chapters
|
17
|
+
# 1. name
|
18
|
+
# 2. composition
|
19
|
+
# 3. galenic form
|
20
|
+
# 4. indications
|
21
|
+
# 5. usage
|
22
|
+
# 6. contra_indications
|
23
|
+
# 7. restrictions
|
24
|
+
# 8. interactions
|
25
|
+
# 9. pregnancy
|
26
|
+
# 10. driving_ability
|
27
|
+
# 11. unwanted_effects
|
28
|
+
# 12. overdose
|
29
|
+
# 13. effects
|
30
|
+
# 14. kinetic
|
31
|
+
# 15. preclinic
|
32
|
+
# 16. other_advice
|
33
|
+
# 17. iksnr
|
34
|
+
# 19. packages
|
35
|
+
# 19. registration_owner
|
36
|
+
# 20. date
|
15
37
|
def chapters
|
16
|
-
# TODO
|
17
|
-
# Franzoesisch
|
18
38
|
chapters = {
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
+
:de => {
|
40
|
+
'name' => /^Name\s+des\s+Präparates$/u, # 1
|
41
|
+
'composition' => /^Zusammensetzung|Wirkstoffe|Hilsstoffe/u, # 2
|
42
|
+
'galenic_form' => /^Galenische\s+Form\s*(und|\/)\s*Wirkstoffmenge\s+pro\s+Einheit$/iu, # 3
|
43
|
+
'indications' => /^Indikationen(\s+|\s*(\/|und)\s*)Anwendungsmöglichkeiten$/u, # 4
|
44
|
+
'usage' => /^Dosierung\s*(\/|und)\s*Anwendung/u, # 5
|
45
|
+
'contra_indications' => /^Kontraindikationen($|\s*\(\s*absolute\s+Kontraindikationen\s*\)$)/u, # 6
|
46
|
+
'restrictions' => /^Warnhinweise\s+und\s+Vorsichtsmassnahmen($|\s*\/\s*(relative\s+Kontraindikationen|Warnhinweise\s*und\s*Vorsichtsmassnahmen)$)/u, # 7
|
47
|
+
'interactions' => /^Interaktionen$/u, # 8
|
48
|
+
'pregnancy' => /^Schwangerschaft(,\s*|\s*\/\s*|\s+und\s+)Stillzeit$/u, # 9
|
49
|
+
'driving_ability' => /^Wirkung\s+auf\s+die\s+Fahrtüchtigkeit\s+und\s+auf\s+das\s+Bedienen\s+von\s+Maschinen$/u, # 10
|
50
|
+
'unwanted_effects' => /^Unerwünschte\s+Wirkungen$/u, # 11
|
51
|
+
'overdose' => /^Überdosierung$/u, # 12
|
52
|
+
'effects' => /^Eigenschaften\s*\/\s*Wirkungen($|\s*\(\s*(ATC\-Code|Wirkungsmechanismus|Pharmakodyamik|Klinische\s+Wirksamkeit)\s*\)\s*$)/iu, # 13
|
53
|
+
'kinetic' => /^Pharmakokinetik($|\s*\((Absorption,\s*Distribution,\s*Metabolisms,\s*Elimination\s|Kinetik\s+spezieller\s+Patientengruppen)*\)$)/iu, # 14
|
54
|
+
'preclinic' => /^Präklinische\s+Daten$/u, # 15
|
55
|
+
'other_advice' => /^Sonstige\s*Hinweise($|\s*\(\s*(Inkompatibilitäten|Beeinflussung\s*diagnostischer\s*Methoden|Haltbarkeit|Besondere\s*Lagerungshinweise|Hinweise\s+für\s+die\s+Handhabung)\s*\)$)|^Remarques/u, # 16
|
56
|
+
'iksnrs' => /^Zulassungsnummer(n|:|$|\s*\(\s*Swissmedic\s*\)$)/u, # 17
|
57
|
+
'packages' => /^Packungen($|\s*\(\s*mit\s+Angabe\s+der\s+Abgabekategorie\s*\)$)/u, # 18
|
58
|
+
'registration_owner' => /^Zulassungsinhaberin($|\s*\(\s*Firma\s+und\s+Sitz\s+gemäss\s*Handelsregisterauszug\s*\))/u, # 19
|
59
|
+
'date' => /^Stand\s+der\s+Information$/iu, # 20
|
60
|
+
'fabrication' => /^Herstellerin/u,
|
61
|
+
'company' => /^Vertriebsfirma/u,
|
62
|
+
},
|
63
|
+
:fr => {
|
64
|
+
'name' => /^Nom$/u, # 1
|
65
|
+
'composition' => /^Composition$/u, # 2
|
66
|
+
'galenic_form' => /^Forme\s+galénique\s+et\s+quantité\s+de\s+principe\s+actif\s+par\s+unité|^Forme\s*gal.nique/iu, # 3
|
67
|
+
'indications' => /^Indications/u, # 4
|
68
|
+
'usage' => /^Posologiei/u, # 5
|
69
|
+
'contra_indications' => /^Contre\-indications/iu, # 6
|
70
|
+
'restrictions' => /^Mises/u, # 7
|
71
|
+
'interactions' => /^Interactions/u, # 8
|
72
|
+
'pregnancy' => /^Grossesse\s*\/\s*Allaitement/u, # 9
|
73
|
+
'driving_ability' => /^Effet\s+sur\s+l'aptitude\s+à;\s+la\s+conduite\s+et\s+l'utilisation\s+de\s+machines/u, # 10
|
74
|
+
'unwanted_effects' => /^Effets/u, # 11
|
75
|
+
'overdose' => /^Surdosage$/u, # 12
|
76
|
+
'effects' => /^Propriétés/iu, # 13
|
77
|
+
'kinetic' => /^Pharmacocinétique$/iu, # 14
|
78
|
+
'preclinic' => /^Données\s+précliniques$/u, # 15
|
79
|
+
'other_advice' => /^Remarques/u, # 16
|
80
|
+
'iksnrs' => /^Numéro\s+d'autorisation$/u, # 17
|
81
|
+
'packages' => /^Présentation/iu, # 18
|
82
|
+
'registration_owner' => /^Titulaire\s+de\s+l'autorisation$/u, # 19
|
83
|
+
'date' => /^Mise à jour/iu, # 20
|
84
|
+
'fabrication' => /^Fabricant$/u,
|
85
|
+
'company' => /^Distributeur/u,
|
86
|
+
}
|
39
87
|
}
|
88
|
+
if @lang == 'fr' || @lang == :fr
|
89
|
+
chapters[:fr]
|
90
|
+
else
|
91
|
+
chapters[:de]
|
92
|
+
end
|
40
93
|
end
|
94
|
+
private
|
41
95
|
def escape_id(text)
|
42
|
-
CGI.escape(text.
|
96
|
+
CGI.escape(text.
|
97
|
+
gsub(/&(.)uml;/, '\1e').gsub(/'/, '').gsub(/&(eacute|agrave);/, 'e').
|
98
|
+
gsub(/\s*\/\s*|\s+|\/|\-/, '_').gsub(/\./, '').downcase)
|
43
99
|
end
|
44
|
-
def parse_code(text) # swissmedic
|
45
|
-
if text =~
|
46
|
-
|
47
|
-
elsif text =~ /^\s*(\d{5})(.*|\s*)\s*\(\s*Swiss\s*medic\s*\)(\s*|.)$/iu
|
100
|
+
def parse_code(text) # swissmedic number
|
101
|
+
if text.gsub(@@figure_pattern, '') =~
|
102
|
+
/^\s*(\d{5})(.*|\s*)\s*\(\s*Swiss\s*medic\s*\)(\s*|.)$/iu
|
48
103
|
@code = "%5d" % $1
|
49
104
|
else
|
50
105
|
nil
|
@@ -54,18 +109,19 @@ module YDocx
|
|
54
109
|
return markup(:h2, text, {:id => id})
|
55
110
|
end
|
56
111
|
def parse_title(node, text)
|
57
|
-
if @indecies.empty? and !text.empty? and
|
58
|
-
(node.previous.
|
112
|
+
if @indecies.empty? and !text.empty? and node.previous and
|
113
|
+
(node.parent.previous.nil? or node.previous.inner_text.strip.empty?)
|
59
114
|
# The first line as package name
|
60
|
-
@
|
61
|
-
|
115
|
+
title = (@lang == 'fr' ? 'Titre' : 'Titel')
|
116
|
+
@indecies << {:text => title, :id => title.downcase}
|
117
|
+
return markup(:h1, text, {:id => title.downcase})
|
62
118
|
else
|
63
119
|
return nil
|
64
120
|
end
|
65
121
|
end
|
66
122
|
def parse_block(node)
|
67
123
|
text = node.inner_text.strip
|
68
|
-
text =
|
124
|
+
text = character_encode(text)
|
69
125
|
chapters.each_pair do |chapter, regexp|
|
70
126
|
if text =~ regexp
|
71
127
|
# allow without line break
|
@@ -73,13 +129,12 @@ module YDocx
|
|
73
129
|
id = escape_id(chapter)
|
74
130
|
@indecies << {:text => chapter, :id => id}
|
75
131
|
return parse_heading(text, id)
|
76
|
-
elsif parse_code(text)
|
77
|
-
return nil
|
78
132
|
end
|
79
133
|
end
|
80
134
|
if title = parse_title(node, text)
|
81
135
|
return title
|
82
136
|
end
|
137
|
+
parse_code(text)
|
83
138
|
return nil
|
84
139
|
end
|
85
140
|
end
|
@@ -177,11 +232,13 @@ div#container {
|
|
177
232
|
end
|
178
233
|
style.gsub(/\s\s+|\n/, ' ')
|
179
234
|
end
|
180
|
-
def resolve_path(path)
|
235
|
+
def resolve_path(path) # image src
|
181
236
|
if reference = @references.shift
|
182
237
|
File.dirname(path) + '/' + reference.basename.to_s
|
183
|
-
|
238
|
+
elsif @files.to_s =~ /\d{5}/
|
184
239
|
path
|
240
|
+
else
|
241
|
+
@files.join path
|
185
242
|
end
|
186
243
|
end
|
187
244
|
end
|
@@ -202,10 +259,11 @@ div#container {
|
|
202
259
|
end
|
203
260
|
@files
|
204
261
|
end
|
205
|
-
def output_file(ext)
|
262
|
+
def output_file(ext) # html
|
263
|
+
lang = (@parser.lang.downcase == 'fr' ? 'fr' : 'de')
|
206
264
|
if @parser.code
|
207
265
|
filename = @parser.code
|
208
|
-
output_directory.join "#{filename}.#{ext.to_s}"
|
266
|
+
output_directory.join "#{lang}_#{filename}.#{ext.to_s}"
|
209
267
|
else # default
|
210
268
|
@path.sub_ext(".#{ext.to_s}")
|
211
269
|
end
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.configure do |config|
|
6
|
+
config.expect_with(:rspec) { |c| c.syntax = :should }
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "ydocx" do
|
10
|
+
|
11
|
+
before :all do
|
12
|
+
end
|
13
|
+
|
14
|
+
before :each do
|
15
|
+
Dir.glob("#{YDcoxHelper::DataDir}/*.xml").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
|
16
|
+
Dir.glob("#{YDcoxHelper::DataDir}/*.html").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
|
17
|
+
end
|
18
|
+
|
19
|
+
after :each do
|
20
|
+
end
|
21
|
+
|
22
|
+
after :all do
|
23
|
+
Dir.glob("#{YDcoxHelper::DataDir}/*.xml").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
|
24
|
+
Dir.glob("#{YDcoxHelper::DataDir}/*.html").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should convert sinovial_FR to xml" do
|
28
|
+
require 'ydocx/templates/fachinfo'
|
29
|
+
sinovial_FR = File.join(YDcoxHelper::DataDir, 'Sinovial_FR.docx')
|
30
|
+
File.exists?(sinovial_FR).should be true
|
31
|
+
doc = YDocx::Document.open(sinovial_FR, { :lang => :fr})
|
32
|
+
sinovial_FR_xml = sinovial_FR.sub('.docx', '.xml')
|
33
|
+
doc.to_xml(sinovial_FR_xml, {:format => :fachinfo})
|
34
|
+
out = doc.output_file('xml')
|
35
|
+
File.exists?(sinovial_FR_xml).should be true
|
36
|
+
doc.parser.lang.to_s.should == 'fr'
|
37
|
+
doc = Nokogiri::XML(open(sinovial_FR_xml))
|
38
|
+
doc.xpath('//chapters/chapter[contains(heading, "Fabricant")]').size.should > 0
|
39
|
+
doc.xpath('//chapters/chapter[contains(heading, "Distributeur")]').size.should > 0
|
40
|
+
doc.xpath('//chapters/chapter[contains(heading, "Remarques particulières")]').size.should > 0
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should convert sinovial_DE to xml" do
|
44
|
+
sinovial_DE = File.join(YDcoxHelper::DataDir, 'Sinovial_DE.docx')
|
45
|
+
File.exists?(sinovial_DE).should be true
|
46
|
+
doc = YDocx::Document.open(sinovial_DE)
|
47
|
+
sinovial_DE_xml = sinovial_DE.sub('.docx', '.xml')
|
48
|
+
doc.to_xml(sinovial_DE_xml, {:format => :fachinfo})
|
49
|
+
out = doc.output_file('xml')
|
50
|
+
File.exists?(sinovial_DE_xml).should be true
|
51
|
+
doc.parser.lang.to_s.should == 'de'
|
52
|
+
doc = Nokogiri::XML(open(sinovial_DE_xml))
|
53
|
+
doc.xpath('//chapters/chapter[contains(heading, "Packung")]').size.should > 0
|
54
|
+
doc.xpath('//chapters/chapter[contains(heading, "Hersteller")]').size.should > 0
|
55
|
+
doc.xpath('//chapters/chapter[contains(heading, "Vertriebsfirma")]').size.should > 0
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should convert sinovial_DE to html" do
|
59
|
+
sinovial_DE = File.join(YDcoxHelper::DataDir, 'Sinovial_DE.docx')
|
60
|
+
File.exists?(sinovial_DE).should be true
|
61
|
+
doc = YDocx::Document.open(sinovial_DE)
|
62
|
+
sinovial_DE_html = sinovial_DE.sub('.docx', '.html')
|
63
|
+
doc.to_html(sinovial_DE_html, {:format => :fachinfo})
|
64
|
+
out = doc.output_file('html')
|
65
|
+
File.exists?(sinovial_DE_html).should be true
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should convert various pseudo fachinfo to xml" do
|
69
|
+
require 'ydocx/templates/fachinfo'
|
70
|
+
files = [ 'Sinovial_0.8_DE.docx', 'Sinovial_0.8_FR.docx',
|
71
|
+
'Sinovial_DE.docx', 'Sinovial_FR.docx',
|
72
|
+
]
|
73
|
+
files.each {
|
74
|
+
|file|
|
75
|
+
file_name = File.join(YDcoxHelper::DataDir, file)
|
76
|
+
File.exists?(file_name).should be true
|
77
|
+
file.match('_DE') ? lang = 'de' : lang = 'fr'
|
78
|
+
doc = YDocx::Document.open(file_name, { :lang => lang} )
|
79
|
+
file_name_xml = file_name.sub('.docx', '.xml')
|
80
|
+
doc.to_xml(file_name_xml, {:format => :fachinfo})
|
81
|
+
out = doc.output_file('xml')
|
82
|
+
File.exists?(file_name_xml).should be true
|
83
|
+
doc.parser.lang.should == lang
|
84
|
+
doc = Nokogiri::XML(open(file_name_xml))
|
85
|
+
}
|
86
|
+
end
|
87
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'simplecov'
|
5
|
+
if ENV['COVERAGE']
|
6
|
+
SimpleCov.start
|
7
|
+
end
|
8
|
+
|
9
|
+
require 'fileutils'
|
10
|
+
require 'pp'
|
11
|
+
require 'ydocx'
|
12
|
+
|
13
|
+
module YDcoxHelper
|
14
|
+
DataDir = File.join(File.dirname(__FILE__), '..', 'spec', 'data')
|
15
|
+
end
|
16
|
+
require 'rspec'
|
17
|
+
require 'ydocx'
|
metadata
CHANGED
@@ -1,88 +1,64 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ydocx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
5
|
-
prerelease:
|
4
|
+
version: 1.2.6
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
|
-
- Yasuhiro
|
7
|
+
- "'Yasuhiro Asak, Zeno R.R. Davatz"
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
13
|
-
dependencies:
|
14
|
-
- !ruby/object:Gem::Dependency
|
15
|
-
name: rdoc
|
16
|
-
requirement: &6667440 !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ~>
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: '3.10'
|
22
|
-
type: :development
|
23
|
-
prerelease: false
|
24
|
-
version_requirements: *6667440
|
25
|
-
- !ruby/object:Gem::Dependency
|
26
|
-
name: hoe
|
27
|
-
requirement: &6667000 !ruby/object:Gem::Requirement
|
28
|
-
none: false
|
29
|
-
requirements:
|
30
|
-
- - ~>
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: '2.13'
|
33
|
-
type: :development
|
34
|
-
prerelease: false
|
35
|
-
version_requirements: *6667000
|
11
|
+
date: 2021-01-20 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
36
13
|
description: ''
|
37
|
-
email:
|
38
|
-
- yasaka@ywesee.com, zdavatz@ywesee.com
|
14
|
+
email: yasaka@ywesee.com, zdavatz@ywesee.com
|
39
15
|
executables:
|
40
|
-
- docx2html
|
41
16
|
- docx2xml
|
17
|
+
- docx2html
|
42
18
|
extensions: []
|
43
|
-
extra_rdoc_files:
|
44
|
-
- History.txt
|
45
|
-
- Manifest.txt
|
46
|
-
- README.txt
|
19
|
+
extra_rdoc_files: []
|
47
20
|
files:
|
48
21
|
- History.txt
|
49
22
|
- Manifest.txt
|
50
23
|
- README.txt
|
51
|
-
- Rakefile
|
52
24
|
- bin/docx2html
|
53
25
|
- bin/docx2xml
|
54
26
|
- lib/ydocx.rb
|
55
27
|
- lib/ydocx/builder.rb
|
28
|
+
- lib/ydocx/command.rb
|
56
29
|
- lib/ydocx/document.rb
|
57
30
|
- lib/ydocx/markup_method.rb
|
58
31
|
- lib/ydocx/parser.rb
|
59
|
-
- lib/ydocx/command.rb
|
60
32
|
- lib/ydocx/templates/fachinfo.rb
|
61
33
|
- lib/ydocx/templates/patinfo.rb
|
62
|
-
|
63
|
-
|
34
|
+
- lib/ydocx/version.rb
|
35
|
+
- spec/data/Sinovial_0.8_DE.docx
|
36
|
+
- spec/data/Sinovial_0.8_FR.docx
|
37
|
+
- spec/data/Sinovial_DE.docx
|
38
|
+
- spec/data/Sinovial_FR.docx
|
39
|
+
- spec/lib/sinovial_spec.rb
|
40
|
+
- spec/spec_helper.rb
|
41
|
+
homepage: https://github.com/zdavatz/ydocx/
|
42
|
+
licenses:
|
43
|
+
- GPL-3.0
|
44
|
+
metadata: {}
|
64
45
|
post_install_message:
|
65
|
-
rdoc_options:
|
66
|
-
- --main
|
67
|
-
- README.txt
|
46
|
+
rdoc_options: []
|
68
47
|
require_paths:
|
69
48
|
- lib
|
70
49
|
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
-
none: false
|
72
50
|
requirements:
|
73
|
-
- -
|
51
|
+
- - ">="
|
74
52
|
- !ruby/object:Gem::Version
|
75
53
|
version: '0'
|
76
54
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
-
none: false
|
78
55
|
requirements:
|
79
|
-
- -
|
56
|
+
- - ">="
|
80
57
|
- !ruby/object:Gem::Version
|
81
58
|
version: '0'
|
82
59
|
requirements: []
|
83
|
-
|
84
|
-
rubygems_version: 1.8.15
|
60
|
+
rubygems_version: 3.1.2
|
85
61
|
signing_key:
|
86
|
-
specification_version:
|
87
|
-
summary:
|
62
|
+
specification_version: 4
|
63
|
+
summary: Parsing docx files with Ruby and output them as HTML and XML
|
88
64
|
test_files: []
|
data/Rakefile
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
# -*- ruby -*-
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'hoe'
|
5
|
-
|
6
|
-
# Hoe.plugin :compiler
|
7
|
-
# Hoe.plugin :gem_prelude_sucks
|
8
|
-
# Hoe.plugin :inline
|
9
|
-
# Hoe.plugin :minitest
|
10
|
-
# Hoe.plugin :racc
|
11
|
-
# Hoe.plugin :rubyforge
|
12
|
-
|
13
|
-
Hoe.spec 'ydocx' do
|
14
|
-
|
15
|
-
developer('Yasuhiro Asaka, Zeno R.R. Davatz', 'yasaka@ywesee.com, zdavatz@ywesee.com')
|
16
|
-
|
17
|
-
end
|