ydocx 1.2.1 → 1.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/History.txt +23 -0
- data/README.txt +16 -4
- data/lib/ydocx.rb +0 -4
- data/lib/ydocx/command.rb +50 -36
- data/lib/ydocx/document.rb +6 -5
- data/lib/ydocx/parser.rb +13 -9
- data/lib/ydocx/templates/fachinfo.rb +98 -40
- data/lib/ydocx/version.rb +5 -0
- data/spec/data/Sinovial_0.8_DE.docx +0 -0
- data/spec/data/Sinovial_0.8_FR.docx +0 -0
- data/spec/data/Sinovial_DE.docx +0 -0
- data/spec/data/Sinovial_FR.docx +0 -0
- data/spec/lib/sinovial_spec.rb +87 -0
- data/spec/spec_helper.rb +17 -0
- metadata +25 -49
- data/Rakefile +0 -17
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d110c0230d26d291bbc973e022379d669a7fa963dc0baf27fa3b0ccaaa193bab
|
4
|
+
data.tar.gz: f21bfa758230376fadeab73e75645dd06862957c0d8c7d120bd489d7ffeb9b1b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 75d8e7bed0a19192c43c4b75be52b9c5f9da1ef265052420d299b810189faf3b880cc3e5f207621695a6cd8d156c477aed00c87b6e82366c7d8dedc012411e90
|
7
|
+
data.tar.gz: 84bb26bab8bc03ea116f90f1df1708d4f646f0b0094a6e685723f2e7448e409a5db8c46845c54d44ee723fd9dd0cf866946d49e8b15f6a0a56d63c01610925cc
|
data/History.txt
CHANGED
@@ -1,3 +1,26 @@
|
|
1
|
+
=== 1.2.6 / 19.01.2021
|
2
|
+
|
3
|
+
* Updated to use for ruby 3.0.0. RMagick->rmagic
|
4
|
+
* Replaced hoe by bundler/gem_tasks
|
5
|
+
|
6
|
+
=== 1.2.5 / 07.07.2014
|
7
|
+
|
8
|
+
* Updated to use rubyzip >= 1.0.0
|
9
|
+
|
10
|
+
=== 1.2.4 / 26.02.2014
|
11
|
+
|
12
|
+
* Many changes by Niklaus Giger to get ready for Pseudo-Fachinformation.
|
13
|
+
|
14
|
+
=== 1.2.3 / 12.07.2012
|
15
|
+
|
16
|
+
* Updated french char handling, lang in filename
|
17
|
+
|
18
|
+
=== 1.2.2 / 12.07.2012
|
19
|
+
|
20
|
+
* Fixed typo in french
|
21
|
+
* Updated help output
|
22
|
+
* Updated help message about --lang option
|
23
|
+
|
1
24
|
=== 1.2.1 / 11.07.2012
|
2
25
|
|
3
26
|
* Updated template, option handling for extension
|
data/README.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
== ydocx
|
1
|
+
== ydocx by ywesee GmbH
|
2
2
|
|
3
3
|
* https://github.com/zdavatz/ydocx
|
4
4
|
* Parsing docx files with Ruby and output them as HTML and XML.
|
@@ -18,17 +18,29 @@
|
|
18
18
|
|
19
19
|
== Usage
|
20
20
|
|
21
|
-
* Usage:
|
21
|
+
* Usage: docx2html file [options]
|
22
22
|
-f, --format Format of style and chapter {(fi|fachinfo)|(pi|patinfo)|(pl|plain)|none}, default fachinfo.
|
23
23
|
-h, --help Display this help message.
|
24
|
+
-l, --lang Language option for templates {de|fr}
|
24
25
|
-v, --version Show version.
|
25
26
|
|
26
27
|
== Using the great libraries
|
27
28
|
|
28
|
-
* rubyzip
|
29
|
+
* rubyzip (< 1.0.0)
|
29
30
|
* nokogiri
|
30
31
|
* htmlentities
|
31
|
-
*
|
32
|
+
* rmagick
|
33
|
+
|
34
|
+
Install them using bundle install.
|
35
|
+
|
36
|
+
Requires Ruby >= 1.9.2.
|
37
|
+
|
38
|
+
== TODO
|
39
|
+
|
40
|
+
For unknown reason the gem fails to parse properly the Présentation for sinovial_fr.
|
41
|
+
Failing spec is: ydocx should convert sinovial_FR to xml
|
42
|
+
Failure/Error: doc.xpath('//chapters/chapter[contains(heading, "Présentation")]').size.should > 0
|
43
|
+
|
32
44
|
|
33
45
|
== License GPLv3.0
|
34
46
|
|
data/lib/ydocx.rb
CHANGED
data/lib/ydocx/command.rb
CHANGED
@@ -6,8 +6,9 @@ require 'ydocx'
|
|
6
6
|
module YDocx
|
7
7
|
class Command
|
8
8
|
class << self
|
9
|
-
@@help = /^\-(h|\-help)$/u
|
10
9
|
@@format = /^\-(f|\-format)$/u
|
10
|
+
@@help = /^\-(h|\-help)$/u
|
11
|
+
@@lang = /^\-(l|\-lang)$/u
|
11
12
|
@@version = /^\-(v|\-version)$/u
|
12
13
|
def error(message='')
|
13
14
|
puts message
|
@@ -25,11 +26,57 @@ module YDocx
|
|
25
26
|
Usage: #{self.command} file [options]
|
26
27
|
-f, --format Format of style and chapter {(fi|fachinfo)|(pi|patinfo)|(pl|plain)|none}, default fachinfo.
|
27
28
|
-h, --help Display this help message.
|
29
|
+
-l, --lang Language option for templates {de|fr}
|
28
30
|
-v, --version Show version.
|
29
31
|
BANNER
|
30
32
|
puts banner
|
31
33
|
exit
|
32
34
|
end
|
35
|
+
def parse(action, argv)
|
36
|
+
if argv.length.odd?
|
37
|
+
self.error "#{self.command}: exit with: Invalid option"
|
38
|
+
else
|
39
|
+
args = Hash[*argv]
|
40
|
+
end
|
41
|
+
options = {}
|
42
|
+
args.each_pair do |option, value|
|
43
|
+
if option =~ @@format
|
44
|
+
case value
|
45
|
+
when 'fi', 'fachinfo'
|
46
|
+
require 'ydocx/templates/fachinfo'
|
47
|
+
options.merge!({:style => :frame}) if action == :to_html
|
48
|
+
when 'pi', 'patinfo'
|
49
|
+
require 'ydocx/templates/patinfo'
|
50
|
+
options.merge!({:style => :frame}) if action == :to_html
|
51
|
+
when 'pl', 'plain'
|
52
|
+
options.merge!({:style => true}) if action == :to_html
|
53
|
+
when 'none'
|
54
|
+
# pass
|
55
|
+
else
|
56
|
+
self.error "#{self.command}: exit with #{option}: Invalid argument"
|
57
|
+
end
|
58
|
+
elsif option =~ @@help
|
59
|
+
self.help
|
60
|
+
elsif option =~ @@lang
|
61
|
+
options.merge!({:lang => value})
|
62
|
+
elsif option.downcase =~ /\.(jpeg|jpg|png|gif)$/u and action == :to_html
|
63
|
+
# allow as default
|
64
|
+
# TODO
|
65
|
+
# refactor as normal option
|
66
|
+
# currently, support fachinfo/patinfo format only
|
67
|
+
require 'ydocx/templates/fachinfo'
|
68
|
+
options.merge!({:style => :frame})
|
69
|
+
else
|
70
|
+
self.error "#{self.command}: exit with #{option}: Unknown option"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
if !args.include?('-f') and !args.include?('--format')
|
74
|
+
# default fachinfo
|
75
|
+
require 'ydocx/templates/fachinfo'
|
76
|
+
options.merge!({:style => :frame}) if action == :to_html
|
77
|
+
end
|
78
|
+
options
|
79
|
+
end
|
33
80
|
def report(action, path)
|
34
81
|
puts "#{self.command}: generated #{File.expand_path(path)}"
|
35
82
|
exit
|
@@ -48,41 +95,8 @@ Usage: #{self.command} file [options]
|
|
48
95
|
elsif !File.extname(path).match(/^\.docx$/)
|
49
96
|
self.error "#{self.command}: cannot open #{file}: Not a docx file"
|
50
97
|
else
|
51
|
-
options =
|
52
|
-
|
53
|
-
if option =~ @@format
|
54
|
-
case argv[0]
|
55
|
-
when 'fi', 'fachinfo'
|
56
|
-
require 'ydocx/templates/fachinfo'
|
57
|
-
options.merge!({:style => :frame}) if action == :to_html
|
58
|
-
when 'pi', 'patinfo'
|
59
|
-
require 'ydocx/templates/patinfo'
|
60
|
-
options.merge!({:style => :frame}) if action == :to_html
|
61
|
-
when 'pl', 'plain'
|
62
|
-
options.merge!({:style => true}) if action == :to_html
|
63
|
-
when 'none'
|
64
|
-
# pass
|
65
|
-
else
|
66
|
-
self.error "#{self.command}: exit with #{option}: Invalid argument"
|
67
|
-
end
|
68
|
-
elsif option =~ @@help
|
69
|
-
self.help
|
70
|
-
elsif option.downcase =~ /\.(jpeg|jpg|png|gif)$/u and action == :to_html
|
71
|
-
# allow as default
|
72
|
-
# TODO
|
73
|
-
# refactor as normal option
|
74
|
-
# currently, support fachinfo/patinfo format only
|
75
|
-
require 'ydocx/templates/fachinfo'
|
76
|
-
options.merge!({:style => :frame})
|
77
|
-
else
|
78
|
-
self.error "#{self.command}: exit with #{option}: Unknown option"
|
79
|
-
end
|
80
|
-
else
|
81
|
-
# default fachinfo
|
82
|
-
require 'ydocx/templates/fachinfo'
|
83
|
-
options.merge!({:style => :frame}) if action == :to_html
|
84
|
-
end
|
85
|
-
doc = YDocx::Document.open(path)
|
98
|
+
options = self.parse(action, argv)
|
99
|
+
doc = YDocx::Document.open(path, options)
|
86
100
|
doc.send(action, path, options)
|
87
101
|
ext = self.extname(action)
|
88
102
|
self.report action, doc.output_file(ext[1..-1])
|
data/lib/ydocx/document.rb
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
# encoding: utf-8
|
3
3
|
|
4
4
|
require 'pathname'
|
5
|
-
require 'zip
|
5
|
+
require 'zip'
|
6
6
|
begin
|
7
|
-
require '
|
7
|
+
require 'rmagick'
|
8
8
|
rescue LoadError
|
9
|
-
warn "Couldn't load
|
9
|
+
warn "Couldn't load rmagick: .wmf conversion off"
|
10
10
|
end
|
11
11
|
require 'ydocx/parser'
|
12
12
|
require 'ydocx/builder'
|
@@ -83,7 +83,7 @@ module YDocx
|
|
83
83
|
def create_files
|
84
84
|
files_dir = output_directory
|
85
85
|
mkdir Pathname.new(files_dir) unless files_dir.exist?
|
86
|
-
@zip = Zip::
|
86
|
+
@zip = Zip::File.open(@path.realpath)
|
87
87
|
@images.each do |image|
|
88
88
|
origin_path = Pathname.new image[:origin] # media/filename.ext
|
89
89
|
source_path = Pathname.new image[:source] # images/filename.ext
|
@@ -118,10 +118,11 @@ module YDocx
|
|
118
118
|
end
|
119
119
|
def read(file)
|
120
120
|
@path = Pathname.new file
|
121
|
-
@zip = Zip::
|
121
|
+
@zip = Zip::File.open(@path.realpath)
|
122
122
|
doc = @zip.find_entry('word/document.xml').get_input_stream
|
123
123
|
rel = @zip.find_entry('word/_rels/document.xml.rels').get_input_stream
|
124
124
|
@parser = Parser.new(doc, rel) do |parser|
|
125
|
+
parser.lang = @options[:lang] if @options[:lang]
|
125
126
|
@contents = parser.parse
|
126
127
|
@indecies = parser.indecies
|
127
128
|
@images = parser.images
|
data/lib/ydocx/parser.rb
CHANGED
@@ -8,7 +8,7 @@ require 'ydocx/markup_method'
|
|
8
8
|
module YDocx
|
9
9
|
class Parser
|
10
10
|
include MarkupMethod
|
11
|
-
attr_accessor :indecies, :images, :result, :space
|
11
|
+
attr_accessor :indecies, :images, :result, :space, :lang
|
12
12
|
def initialize(doc, rel)
|
13
13
|
@doc = Nokogiri::XML.parse(doc)
|
14
14
|
@rel = Nokogiri::XML.parse(rel)
|
@@ -55,7 +55,7 @@ module YDocx
|
|
55
55
|
if symbol
|
56
56
|
_text = ''
|
57
57
|
text.unpack('U*').each do |char|
|
58
|
-
_text <<
|
58
|
+
_text << character_replace(char.to_s(16))
|
59
59
|
end
|
60
60
|
text = _text
|
61
61
|
end
|
@@ -76,14 +76,14 @@ module YDocx
|
|
76
76
|
end
|
77
77
|
text
|
78
78
|
end
|
79
|
-
def
|
79
|
+
def character_encode(text)
|
80
80
|
text.force_encoding('utf-8')
|
81
81
|
# NOTE
|
82
82
|
# :named only for escape at Builder
|
83
83
|
text = @coder.encode(text, :named)
|
84
84
|
text
|
85
85
|
end
|
86
|
-
def
|
86
|
+
def character_replace(code)
|
87
87
|
code = '0x' + code
|
88
88
|
# NOTE
|
89
89
|
# replace with rsemble html character ref
|
@@ -143,6 +143,9 @@ module YDocx
|
|
143
143
|
#p "char : " + @coder.decode("&#%s;" % code.hex.to_s)
|
144
144
|
end
|
145
145
|
end
|
146
|
+
def optional_escape(text)
|
147
|
+
text
|
148
|
+
end
|
146
149
|
def parse_block(node)
|
147
150
|
nil # default no block element
|
148
151
|
end
|
@@ -155,19 +158,19 @@ module YDocx
|
|
155
158
|
ns = r.namespaces.merge additional_namespaces
|
156
159
|
[
|
157
160
|
{ # old type shape
|
158
|
-
:attr => 'id',
|
161
|
+
:attr => 'r:id',
|
159
162
|
:path => 'w:pict//v:shape//v:imagedata',
|
160
163
|
:wrap => 'w:pict//v:shape//w10:wrap',
|
161
164
|
:type => '',
|
162
165
|
},
|
163
166
|
{ # in anchor
|
164
|
-
:attr => 'embed',
|
167
|
+
:attr => 'r:embed',
|
165
168
|
:path => 'w:drawing//wp:anchor//a:graphic//a:graphicData//pic:pic//pic:blipFill//a:blip',
|
166
169
|
:wrap => 'w:drawing//wp:anchor//wp:wrapTight',
|
167
170
|
:type => 'wrapText',
|
168
171
|
},
|
169
172
|
{ # stand alone
|
170
|
-
:attr => 'embed',
|
173
|
+
:attr => 'r:embed',
|
171
174
|
:path => 'w:drawing//a:graphic//a:graphicData//pic:pic//pic:blipFill//a:blip',
|
172
175
|
:wrap => 'w:drawing//wp:wrapTight',
|
173
176
|
:type => 'wrapText',
|
@@ -229,8 +232,8 @@ module YDocx
|
|
229
232
|
end
|
230
233
|
end
|
231
234
|
unless r.xpath('w:sym').empty?
|
232
|
-
code = r.xpath('w:sym').first['char'].downcase # w:char
|
233
|
-
content <<
|
235
|
+
code = r.xpath('w:sym').first['w:char'].downcase # w:char
|
236
|
+
content << character_replace(code)
|
234
237
|
pos += 1
|
235
238
|
end
|
236
239
|
if !r.xpath('w:pict').empty? or !r.xpath('w:drawing').empty?
|
@@ -276,6 +279,7 @@ module YDocx
|
|
276
279
|
end
|
277
280
|
def parse_text(r, lstrip=false)
|
278
281
|
text = r.xpath('w:t').map(&:text).join('')
|
282
|
+
text = character_encode(text)
|
279
283
|
text = optional_escape(text)
|
280
284
|
text = text.lstrip if lstrip
|
281
285
|
if rpr = r.xpath('w:rPr')
|
@@ -6,45 +6,100 @@ require 'cgi'
|
|
6
6
|
module YDocx
|
7
7
|
class Parser
|
8
8
|
attr_accessor :code, :lang
|
9
|
+
@@figure_pattern = /‘|’|'|´/
|
9
10
|
def init
|
10
11
|
@image_path = 'image'
|
11
12
|
@code = nil
|
12
|
-
@lang
|
13
|
+
@lang ||= 'de'
|
13
14
|
end
|
14
|
-
|
15
|
+
###
|
16
|
+
# Fachinfo Chapters
|
17
|
+
# 1. name
|
18
|
+
# 2. composition
|
19
|
+
# 3. galenic form
|
20
|
+
# 4. indications
|
21
|
+
# 5. usage
|
22
|
+
# 6. contra_indications
|
23
|
+
# 7. restrictions
|
24
|
+
# 8. interactions
|
25
|
+
# 9. pregnancy
|
26
|
+
# 10. driving_ability
|
27
|
+
# 11. unwanted_effects
|
28
|
+
# 12. overdose
|
29
|
+
# 13. effects
|
30
|
+
# 14. kinetic
|
31
|
+
# 15. preclinic
|
32
|
+
# 16. other_advice
|
33
|
+
# 17. iksnr
|
34
|
+
# 19. packages
|
35
|
+
# 19. registration_owner
|
36
|
+
# 20. date
|
15
37
|
def chapters
|
16
|
-
# TODO
|
17
|
-
# Franzoesisch
|
18
38
|
chapters = {
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
+
:de => {
|
40
|
+
'name' => /^Name\s+des\s+Präparates$/u, # 1
|
41
|
+
'composition' => /^Zusammensetzung|Wirkstoffe|Hilsstoffe/u, # 2
|
42
|
+
'galenic_form' => /^Galenische\s+Form\s*(und|\/)\s*Wirkstoffmenge\s+pro\s+Einheit$/iu, # 3
|
43
|
+
'indications' => /^Indikationen(\s+|\s*(\/|und)\s*)Anwendungsmöglichkeiten$/u, # 4
|
44
|
+
'usage' => /^Dosierung\s*(\/|und)\s*Anwendung/u, # 5
|
45
|
+
'contra_indications' => /^Kontraindikationen($|\s*\(\s*absolute\s+Kontraindikationen\s*\)$)/u, # 6
|
46
|
+
'restrictions' => /^Warnhinweise\s+und\s+Vorsichtsmassnahmen($|\s*\/\s*(relative\s+Kontraindikationen|Warnhinweise\s*und\s*Vorsichtsmassnahmen)$)/u, # 7
|
47
|
+
'interactions' => /^Interaktionen$/u, # 8
|
48
|
+
'pregnancy' => /^Schwangerschaft(,\s*|\s*\/\s*|\s+und\s+)Stillzeit$/u, # 9
|
49
|
+
'driving_ability' => /^Wirkung\s+auf\s+die\s+Fahrtüchtigkeit\s+und\s+auf\s+das\s+Bedienen\s+von\s+Maschinen$/u, # 10
|
50
|
+
'unwanted_effects' => /^Unerwünschte\s+Wirkungen$/u, # 11
|
51
|
+
'overdose' => /^Überdosierung$/u, # 12
|
52
|
+
'effects' => /^Eigenschaften\s*\/\s*Wirkungen($|\s*\(\s*(ATC\-Code|Wirkungsmechanismus|Pharmakodyamik|Klinische\s+Wirksamkeit)\s*\)\s*$)/iu, # 13
|
53
|
+
'kinetic' => /^Pharmakokinetik($|\s*\((Absorption,\s*Distribution,\s*Metabolisms,\s*Elimination\s|Kinetik\s+spezieller\s+Patientengruppen)*\)$)/iu, # 14
|
54
|
+
'preclinic' => /^Präklinische\s+Daten$/u, # 15
|
55
|
+
'other_advice' => /^Sonstige\s*Hinweise($|\s*\(\s*(Inkompatibilitäten|Beeinflussung\s*diagnostischer\s*Methoden|Haltbarkeit|Besondere\s*Lagerungshinweise|Hinweise\s+für\s+die\s+Handhabung)\s*\)$)|^Remarques/u, # 16
|
56
|
+
'iksnrs' => /^Zulassungsnummer(n|:|$|\s*\(\s*Swissmedic\s*\)$)/u, # 17
|
57
|
+
'packages' => /^Packungen($|\s*\(\s*mit\s+Angabe\s+der\s+Abgabekategorie\s*\)$)/u, # 18
|
58
|
+
'registration_owner' => /^Zulassungsinhaberin($|\s*\(\s*Firma\s+und\s+Sitz\s+gemäss\s*Handelsregisterauszug\s*\))/u, # 19
|
59
|
+
'date' => /^Stand\s+der\s+Information$/iu, # 20
|
60
|
+
'fabrication' => /^Herstellerin/u,
|
61
|
+
'company' => /^Vertriebsfirma/u,
|
62
|
+
},
|
63
|
+
:fr => {
|
64
|
+
'name' => /^Nom$/u, # 1
|
65
|
+
'composition' => /^Composition$/u, # 2
|
66
|
+
'galenic_form' => /^Forme\s+galénique\s+et\s+quantité\s+de\s+principe\s+actif\s+par\s+unité|^Forme\s*gal.nique/iu, # 3
|
67
|
+
'indications' => /^Indications/u, # 4
|
68
|
+
'usage' => /^Posologiei/u, # 5
|
69
|
+
'contra_indications' => /^Contre\-indications/iu, # 6
|
70
|
+
'restrictions' => /^Mises/u, # 7
|
71
|
+
'interactions' => /^Interactions/u, # 8
|
72
|
+
'pregnancy' => /^Grossesse\s*\/\s*Allaitement/u, # 9
|
73
|
+
'driving_ability' => /^Effet\s+sur\s+l'aptitude\s+à;\s+la\s+conduite\s+et\s+l'utilisation\s+de\s+machines/u, # 10
|
74
|
+
'unwanted_effects' => /^Effets/u, # 11
|
75
|
+
'overdose' => /^Surdosage$/u, # 12
|
76
|
+
'effects' => /^Propriétés/iu, # 13
|
77
|
+
'kinetic' => /^Pharmacocinétique$/iu, # 14
|
78
|
+
'preclinic' => /^Données\s+précliniques$/u, # 15
|
79
|
+
'other_advice' => /^Remarques/u, # 16
|
80
|
+
'iksnrs' => /^Numéro\s+d'autorisation$/u, # 17
|
81
|
+
'packages' => /^Présentation/iu, # 18
|
82
|
+
'registration_owner' => /^Titulaire\s+de\s+l'autorisation$/u, # 19
|
83
|
+
'date' => /^Mise à jour/iu, # 20
|
84
|
+
'fabrication' => /^Fabricant$/u,
|
85
|
+
'company' => /^Distributeur/u,
|
86
|
+
}
|
39
87
|
}
|
88
|
+
if @lang == 'fr' || @lang == :fr
|
89
|
+
chapters[:fr]
|
90
|
+
else
|
91
|
+
chapters[:de]
|
92
|
+
end
|
40
93
|
end
|
94
|
+
private
|
41
95
|
def escape_id(text)
|
42
|
-
CGI.escape(text.
|
96
|
+
CGI.escape(text.
|
97
|
+
gsub(/&(.)uml;/, '\1e').gsub(/'/, '').gsub(/&(eacute|agrave);/, 'e').
|
98
|
+
gsub(/\s*\/\s*|\s+|\/|\-/, '_').gsub(/\./, '').downcase)
|
43
99
|
end
|
44
|
-
def parse_code(text) # swissmedic
|
45
|
-
if text =~
|
46
|
-
|
47
|
-
elsif text =~ /^\s*(\d{5})(.*|\s*)\s*\(\s*Swiss\s*medic\s*\)(\s*|.)$/iu
|
100
|
+
def parse_code(text) # swissmedic number
|
101
|
+
if text.gsub(@@figure_pattern, '') =~
|
102
|
+
/^\s*(\d{5})(.*|\s*)\s*\(\s*Swiss\s*medic\s*\)(\s*|.)$/iu
|
48
103
|
@code = "%5d" % $1
|
49
104
|
else
|
50
105
|
nil
|
@@ -54,18 +109,19 @@ module YDocx
|
|
54
109
|
return markup(:h2, text, {:id => id})
|
55
110
|
end
|
56
111
|
def parse_title(node, text)
|
57
|
-
if @indecies.empty? and !text.empty? and
|
58
|
-
(node.previous.
|
112
|
+
if @indecies.empty? and !text.empty? and node.previous and
|
113
|
+
(node.parent.previous.nil? or node.previous.inner_text.strip.empty?)
|
59
114
|
# The first line as package name
|
60
|
-
@
|
61
|
-
|
115
|
+
title = (@lang == 'fr' ? 'Titre' : 'Titel')
|
116
|
+
@indecies << {:text => title, :id => title.downcase}
|
117
|
+
return markup(:h1, text, {:id => title.downcase})
|
62
118
|
else
|
63
119
|
return nil
|
64
120
|
end
|
65
121
|
end
|
66
122
|
def parse_block(node)
|
67
123
|
text = node.inner_text.strip
|
68
|
-
text =
|
124
|
+
text = character_encode(text)
|
69
125
|
chapters.each_pair do |chapter, regexp|
|
70
126
|
if text =~ regexp
|
71
127
|
# allow without line break
|
@@ -73,13 +129,12 @@ module YDocx
|
|
73
129
|
id = escape_id(chapter)
|
74
130
|
@indecies << {:text => chapter, :id => id}
|
75
131
|
return parse_heading(text, id)
|
76
|
-
elsif parse_code(text)
|
77
|
-
return nil
|
78
132
|
end
|
79
133
|
end
|
80
134
|
if title = parse_title(node, text)
|
81
135
|
return title
|
82
136
|
end
|
137
|
+
parse_code(text)
|
83
138
|
return nil
|
84
139
|
end
|
85
140
|
end
|
@@ -177,11 +232,13 @@ div#container {
|
|
177
232
|
end
|
178
233
|
style.gsub(/\s\s+|\n/, ' ')
|
179
234
|
end
|
180
|
-
def resolve_path(path)
|
235
|
+
def resolve_path(path) # image src
|
181
236
|
if reference = @references.shift
|
182
237
|
File.dirname(path) + '/' + reference.basename.to_s
|
183
|
-
|
238
|
+
elsif @files.to_s =~ /\d{5}/
|
184
239
|
path
|
240
|
+
else
|
241
|
+
@files.join path
|
185
242
|
end
|
186
243
|
end
|
187
244
|
end
|
@@ -202,10 +259,11 @@ div#container {
|
|
202
259
|
end
|
203
260
|
@files
|
204
261
|
end
|
205
|
-
def output_file(ext)
|
262
|
+
def output_file(ext) # html
|
263
|
+
lang = (@parser.lang.downcase == 'fr' ? 'fr' : 'de')
|
206
264
|
if @parser.code
|
207
265
|
filename = @parser.code
|
208
|
-
output_directory.join "#{filename}.#{ext.to_s}"
|
266
|
+
output_directory.join "#{lang}_#{filename}.#{ext.to_s}"
|
209
267
|
else # default
|
210
268
|
@path.sub_ext(".#{ext.to_s}")
|
211
269
|
end
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.configure do |config|
|
6
|
+
config.expect_with(:rspec) { |c| c.syntax = :should }
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "ydocx" do
|
10
|
+
|
11
|
+
before :all do
|
12
|
+
end
|
13
|
+
|
14
|
+
before :each do
|
15
|
+
Dir.glob("#{YDcoxHelper::DataDir}/*.xml").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
|
16
|
+
Dir.glob("#{YDcoxHelper::DataDir}/*.html").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
|
17
|
+
end
|
18
|
+
|
19
|
+
after :each do
|
20
|
+
end
|
21
|
+
|
22
|
+
after :all do
|
23
|
+
Dir.glob("#{YDcoxHelper::DataDir}/*.xml").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
|
24
|
+
Dir.glob("#{YDcoxHelper::DataDir}/*.html").each { |file| FileUtils.rm_f(file, :verbose => $VERBOSE) }
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should convert sinovial_FR to xml" do
|
28
|
+
require 'ydocx/templates/fachinfo'
|
29
|
+
sinovial_FR = File.join(YDcoxHelper::DataDir, 'Sinovial_FR.docx')
|
30
|
+
File.exists?(sinovial_FR).should be true
|
31
|
+
doc = YDocx::Document.open(sinovial_FR, { :lang => :fr})
|
32
|
+
sinovial_FR_xml = sinovial_FR.sub('.docx', '.xml')
|
33
|
+
doc.to_xml(sinovial_FR_xml, {:format => :fachinfo})
|
34
|
+
out = doc.output_file('xml')
|
35
|
+
File.exists?(sinovial_FR_xml).should be true
|
36
|
+
doc.parser.lang.to_s.should == 'fr'
|
37
|
+
doc = Nokogiri::XML(open(sinovial_FR_xml))
|
38
|
+
doc.xpath('//chapters/chapter[contains(heading, "Fabricant")]').size.should > 0
|
39
|
+
doc.xpath('//chapters/chapter[contains(heading, "Distributeur")]').size.should > 0
|
40
|
+
doc.xpath('//chapters/chapter[contains(heading, "Remarques particulières")]').size.should > 0
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should convert sinovial_DE to xml" do
|
44
|
+
sinovial_DE = File.join(YDcoxHelper::DataDir, 'Sinovial_DE.docx')
|
45
|
+
File.exists?(sinovial_DE).should be true
|
46
|
+
doc = YDocx::Document.open(sinovial_DE)
|
47
|
+
sinovial_DE_xml = sinovial_DE.sub('.docx', '.xml')
|
48
|
+
doc.to_xml(sinovial_DE_xml, {:format => :fachinfo})
|
49
|
+
out = doc.output_file('xml')
|
50
|
+
File.exists?(sinovial_DE_xml).should be true
|
51
|
+
doc.parser.lang.to_s.should == 'de'
|
52
|
+
doc = Nokogiri::XML(open(sinovial_DE_xml))
|
53
|
+
doc.xpath('//chapters/chapter[contains(heading, "Packung")]').size.should > 0
|
54
|
+
doc.xpath('//chapters/chapter[contains(heading, "Hersteller")]').size.should > 0
|
55
|
+
doc.xpath('//chapters/chapter[contains(heading, "Vertriebsfirma")]').size.should > 0
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should convert sinovial_DE to html" do
|
59
|
+
sinovial_DE = File.join(YDcoxHelper::DataDir, 'Sinovial_DE.docx')
|
60
|
+
File.exists?(sinovial_DE).should be true
|
61
|
+
doc = YDocx::Document.open(sinovial_DE)
|
62
|
+
sinovial_DE_html = sinovial_DE.sub('.docx', '.html')
|
63
|
+
doc.to_html(sinovial_DE_html, {:format => :fachinfo})
|
64
|
+
out = doc.output_file('html')
|
65
|
+
File.exists?(sinovial_DE_html).should be true
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should convert various pseudo fachinfo to xml" do
|
69
|
+
require 'ydocx/templates/fachinfo'
|
70
|
+
files = [ 'Sinovial_0.8_DE.docx', 'Sinovial_0.8_FR.docx',
|
71
|
+
'Sinovial_DE.docx', 'Sinovial_FR.docx',
|
72
|
+
]
|
73
|
+
files.each {
|
74
|
+
|file|
|
75
|
+
file_name = File.join(YDcoxHelper::DataDir, file)
|
76
|
+
File.exists?(file_name).should be true
|
77
|
+
file.match('_DE') ? lang = 'de' : lang = 'fr'
|
78
|
+
doc = YDocx::Document.open(file_name, { :lang => lang} )
|
79
|
+
file_name_xml = file_name.sub('.docx', '.xml')
|
80
|
+
doc.to_xml(file_name_xml, {:format => :fachinfo})
|
81
|
+
out = doc.output_file('xml')
|
82
|
+
File.exists?(file_name_xml).should be true
|
83
|
+
doc.parser.lang.should == lang
|
84
|
+
doc = Nokogiri::XML(open(file_name_xml))
|
85
|
+
}
|
86
|
+
end
|
87
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'simplecov'
|
5
|
+
if ENV['COVERAGE']
|
6
|
+
SimpleCov.start
|
7
|
+
end
|
8
|
+
|
9
|
+
require 'fileutils'
|
10
|
+
require 'pp'
|
11
|
+
require 'ydocx'
|
12
|
+
|
13
|
+
module YDcoxHelper
|
14
|
+
DataDir = File.join(File.dirname(__FILE__), '..', 'spec', 'data')
|
15
|
+
end
|
16
|
+
require 'rspec'
|
17
|
+
require 'ydocx'
|
metadata
CHANGED
@@ -1,88 +1,64 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ydocx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
5
|
-
prerelease:
|
4
|
+
version: 1.2.6
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
|
-
- Yasuhiro
|
7
|
+
- "'Yasuhiro Asak, Zeno R.R. Davatz"
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
13
|
-
dependencies:
|
14
|
-
- !ruby/object:Gem::Dependency
|
15
|
-
name: rdoc
|
16
|
-
requirement: &6667440 !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ~>
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: '3.10'
|
22
|
-
type: :development
|
23
|
-
prerelease: false
|
24
|
-
version_requirements: *6667440
|
25
|
-
- !ruby/object:Gem::Dependency
|
26
|
-
name: hoe
|
27
|
-
requirement: &6667000 !ruby/object:Gem::Requirement
|
28
|
-
none: false
|
29
|
-
requirements:
|
30
|
-
- - ~>
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: '2.13'
|
33
|
-
type: :development
|
34
|
-
prerelease: false
|
35
|
-
version_requirements: *6667000
|
11
|
+
date: 2021-01-20 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
36
13
|
description: ''
|
37
|
-
email:
|
38
|
-
- yasaka@ywesee.com, zdavatz@ywesee.com
|
14
|
+
email: yasaka@ywesee.com, zdavatz@ywesee.com
|
39
15
|
executables:
|
40
|
-
- docx2html
|
41
16
|
- docx2xml
|
17
|
+
- docx2html
|
42
18
|
extensions: []
|
43
|
-
extra_rdoc_files:
|
44
|
-
- History.txt
|
45
|
-
- Manifest.txt
|
46
|
-
- README.txt
|
19
|
+
extra_rdoc_files: []
|
47
20
|
files:
|
48
21
|
- History.txt
|
49
22
|
- Manifest.txt
|
50
23
|
- README.txt
|
51
|
-
- Rakefile
|
52
24
|
- bin/docx2html
|
53
25
|
- bin/docx2xml
|
54
26
|
- lib/ydocx.rb
|
55
27
|
- lib/ydocx/builder.rb
|
28
|
+
- lib/ydocx/command.rb
|
56
29
|
- lib/ydocx/document.rb
|
57
30
|
- lib/ydocx/markup_method.rb
|
58
31
|
- lib/ydocx/parser.rb
|
59
|
-
- lib/ydocx/command.rb
|
60
32
|
- lib/ydocx/templates/fachinfo.rb
|
61
33
|
- lib/ydocx/templates/patinfo.rb
|
62
|
-
|
63
|
-
|
34
|
+
- lib/ydocx/version.rb
|
35
|
+
- spec/data/Sinovial_0.8_DE.docx
|
36
|
+
- spec/data/Sinovial_0.8_FR.docx
|
37
|
+
- spec/data/Sinovial_DE.docx
|
38
|
+
- spec/data/Sinovial_FR.docx
|
39
|
+
- spec/lib/sinovial_spec.rb
|
40
|
+
- spec/spec_helper.rb
|
41
|
+
homepage: https://github.com/zdavatz/ydocx/
|
42
|
+
licenses:
|
43
|
+
- GPL-3.0
|
44
|
+
metadata: {}
|
64
45
|
post_install_message:
|
65
|
-
rdoc_options:
|
66
|
-
- --main
|
67
|
-
- README.txt
|
46
|
+
rdoc_options: []
|
68
47
|
require_paths:
|
69
48
|
- lib
|
70
49
|
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
-
none: false
|
72
50
|
requirements:
|
73
|
-
- -
|
51
|
+
- - ">="
|
74
52
|
- !ruby/object:Gem::Version
|
75
53
|
version: '0'
|
76
54
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
-
none: false
|
78
55
|
requirements:
|
79
|
-
- -
|
56
|
+
- - ">="
|
80
57
|
- !ruby/object:Gem::Version
|
81
58
|
version: '0'
|
82
59
|
requirements: []
|
83
|
-
|
84
|
-
rubygems_version: 1.8.15
|
60
|
+
rubygems_version: 3.1.2
|
85
61
|
signing_key:
|
86
|
-
specification_version:
|
87
|
-
summary:
|
62
|
+
specification_version: 4
|
63
|
+
summary: Parsing docx files with Ruby and output them as HTML and XML
|
88
64
|
test_files: []
|
data/Rakefile
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
# -*- ruby -*-
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'hoe'
|
5
|
-
|
6
|
-
# Hoe.plugin :compiler
|
7
|
-
# Hoe.plugin :gem_prelude_sucks
|
8
|
-
# Hoe.plugin :inline
|
9
|
-
# Hoe.plugin :minitest
|
10
|
-
# Hoe.plugin :racc
|
11
|
-
# Hoe.plugin :rubyforge
|
12
|
-
|
13
|
-
Hoe.spec 'ydocx' do
|
14
|
-
|
15
|
-
developer('Yasuhiro Asaka, Zeno R.R. Davatz', 'yasaka@ywesee.com, zdavatz@ywesee.com')
|
16
|
-
|
17
|
-
end
|