ydocx 1.0.4 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,13 @@
1
+ === 1.0.5 / 03.05.2012
2
+
3
+ * Added img support
4
+ - without wmf conversion
5
+ - create ***_files directory into same directory with html
6
+ * Improved chapter regexp for patinfo
7
+ * Updated command option fachinfo format as default
8
+ * Improved regexp for patinfo
9
+ * Added Patinfo format
10
+
1
11
  === 1.0.4 / 02.05.2012
2
12
 
3
13
  * Fixed bug for action to_xml in command
data/Manifest.txt CHANGED
@@ -12,3 +12,4 @@ lib/ydocx/markup_method.rb
12
12
  lib/ydocx/parser.rb
13
13
  lib/ydocx/command.rb
14
14
  lib/ydocx/templates/fachinfo.rb
15
+ lib/ydocx/templates/patinfo.rb
data/README.txt CHANGED
@@ -11,6 +11,12 @@
11
11
  * Umlaute
12
12
  * bold, italic, underline
13
13
 
14
+ == Usage
15
+
16
+ * Usage: /usr/local/bin/docx2html file [options]
17
+ -f, --format Format of style and chapter {(fi|fachinfo)|(pl|plain)}, default none.
18
+ -h, --help Display this help message.
19
+
14
20
  == Using the great libraries
15
21
 
16
22
  * nokogiri
data/lib/version.rb CHANGED
@@ -2,5 +2,5 @@
2
2
  # encoding: utf-8
3
3
 
4
4
  module Docx2html
5
- VERSION = "1.0.4"
5
+ VERSION = "1.0.5"
6
6
  end
data/lib/ydocx/builder.rb CHANGED
@@ -2,17 +2,19 @@
2
2
  # encoding: utf-8
3
3
 
4
4
  require 'nokogiri'
5
+ require 'pathname'
5
6
  require 'ydocx/markup_method'
6
7
 
7
8
  module YDocx
8
9
  class Builder
9
10
  include MarkupMethod
10
11
  attr_accessor :contents, :container, :indecies,
11
- :style, :title
12
+ :files, :style, :title
12
13
  def initialize(contents)
13
14
  @contents = contents
14
15
  @container = {}
15
16
  @indecies = []
17
+ @files = Pathname.new('.')
16
18
  @style = false
17
19
  @title = ''
18
20
  init
@@ -47,10 +49,10 @@ module YDocx
47
49
  builder.to_html.gsub(/\n/, '')
48
50
  end
49
51
  def build_xml
50
- chapters = compile(@contents, :xml)
52
+ paragraphs = compile(@contents, :xml)
51
53
  builder = Nokogiri::XML::Builder.new do |xml|
52
54
  xml.document {
53
- xml.chapters { xml << chapters }
55
+ xml.paragraphs { xml << paragraphs }
54
56
  }
55
57
  end
56
58
  builder.to_xml(:indent => 0, :encoding => 'utf-8').gsub(/\n/, '')
@@ -87,7 +89,7 @@ module YDocx
87
89
  if tag == :br and mode != :xml
88
90
  return "<br/>"
89
91
  elsif content.nil? or content.empty?
90
- return ''
92
+ return '' if attributes.nil? # without img
91
93
  end
92
94
  _content = ''
93
95
  if content.is_a? Array
@@ -109,7 +111,12 @@ module YDocx
109
111
  unless attributes.empty?
110
112
  attributes.each_pair do |key, value|
111
113
  next if mode == :xml and key.to_s =~ /(id|style|colspan)/u
112
- _attributes << " #{key.to_s}=#{value.to_s}"
114
+ if tag == :img and key == :src
115
+ src = @files.join value.to_s
116
+ _attributes << " src=\"#{src}\""
117
+ else
118
+ _attributes << " #{key.to_s}=\"#{value.to_s}\""
119
+ end
113
120
  end
114
121
  end
115
122
  if mode == :xml
@@ -123,7 +130,11 @@ module YDocx
123
130
  when :sub then _tag = 'subscript' # text
124
131
  end
125
132
  end
126
- return "<#{_tag}#{_attributes}>#{_content}</#{_tag}>"
133
+ if tag == :img
134
+ return "<#{_tag}#{_attributes}/>"
135
+ else
136
+ return "<#{_tag}#{_attributes}>#{_content}</#{_tag}>"
137
+ end
127
138
  end
128
139
  def style
129
140
  style = <<-CSS
data/lib/ydocx/command.rb CHANGED
@@ -19,7 +19,7 @@ module YDocx
19
19
  def help
20
20
  banner = <<-BANNER
21
21
  Usage: #{$0} file [options]
22
- -f, --format Format of style and chapter {fi|fachinfo}, default none.
22
+ -f, --format Format of style and chapter {(fi|fachinfo)|(pl|plain)|none}, default fachinfo.
23
23
  -h, --help Display this help message.
24
24
  BANNER
25
25
  puts banner
@@ -50,10 +50,14 @@ Usage: #{$0} file [options]
50
50
  case argv[0]
51
51
  when 'fi', 'fachinfo'
52
52
  require 'ydocx/templates/fachinfo'
53
- # TODO style option?
54
53
  options.merge!({:style => :frame}) if action == :to_html
55
54
  when 'pi', 'patinfo'
56
- # pending
55
+ require 'ydocx/templates/patinfo'
56
+ options.merge!({:style => :frame}) if action == :to_html
57
+ when 'pl', 'plain'
58
+ options.merge!({:style => true}) if action == :to_html
59
+ when 'none'
60
+ # pass
57
61
  else
58
62
  self.error "#{self.command}: exit with #{option}: Invalid argument"
59
63
  end
@@ -62,6 +66,10 @@ Usage: #{$0} file [options]
62
66
  else
63
67
  self.error "#{self.command}: exit with #{option}: Unknown option"
64
68
  end
69
+ else
70
+ # default fachinfo
71
+ require 'ydocx/templates/fachinfo'
72
+ options.merge!({:style => :frame}) if action == :to_html
65
73
  end
66
74
  YDocx::Document.open(path).send(action, path, options)
67
75
  self.report action, path
@@ -8,19 +8,25 @@ require 'ydocx/builder'
8
8
 
9
9
  module YDocx
10
10
  class Document
11
- attr_reader :contents, :indecies
11
+ attr_reader :contents, :indecies, :pictures
12
12
  def self.open(file)
13
13
  self.new(file)
14
14
  end
15
15
  def initialize(file)
16
16
  @contents = nil
17
17
  @indecies = nil
18
+ @pictures = []
19
+ @path = nil
20
+ @files = nil
21
+ @zip = nil
18
22
  read(file)
19
23
  end
20
24
  def to_html(file='', options={})
21
25
  html = ''
26
+ @files = @path.dirname.join(@path.basename('.docx').to_s + '_files')
22
27
  Builder.new(@contents) do |builder|
23
- builder.title = @path
28
+ builder.title = @path.basename
29
+ builder.files = @files
24
30
  builder.style = options[:style] if options.has_key?(:style)
25
31
  if @indecies
26
32
  builder.indecies = @indecies
@@ -28,8 +34,9 @@ module YDocx
28
34
  html = builder.build_html
29
35
  end
30
36
  unless file.empty?
31
- path = Pathname.new(file).realpath.sub_ext('.html')
32
- File.open(path, 'w:utf-8') do |f|
37
+ create_files if has_picture?
38
+ html_file = @path.sub_ext('.html')
39
+ File.open(html_file, 'w:utf-8') do |f|
33
40
  f.puts html
34
41
  end
35
42
  else
@@ -42,8 +49,8 @@ module YDocx
42
49
  xml = builder.build_xml
43
50
  end
44
51
  unless file.empty?
45
- path = Pathname.new(file).realpath.sub_ext('.xml')
46
- File.open(path, 'w:utf-8') do |f|
52
+ xml_file = @path.sub_ext('.xml')
53
+ File.open(xml_file, 'w:utf-8') do |f|
47
54
  f.puts xml
48
55
  end
49
56
  else
@@ -51,13 +58,32 @@ module YDocx
51
58
  end
52
59
  end
53
60
  private
61
+ def has_picture?
62
+ !@pictures.empty?
63
+ end
64
+ def create_files
65
+ FileUtils.mkdir @files unless @files.exist?
66
+ @zip = Zip::ZipFile.open(@path.realpath)
67
+ @pictures.each do |pic|
68
+ pic_path = Pathname.new pic # id/filename.ext
69
+ pic_dir = @files.join pic_path.dirname
70
+ FileUtils.mkdir pic_dir unless pic_dir.exist?
71
+ binary = @zip.find_entry("word/media/#{pic_path.basename}").get_input_stream
72
+ @files.join(pic_path).open('w') do |f|
73
+ f.puts binary.read
74
+ end
75
+ end
76
+ @zip.close
77
+ end
54
78
  def read(file)
55
- @path = File.expand_path(file)
56
- @zip = Zip::ZipFile.open(@path)
57
- stream = @zip.find_entry('word/document.xml').get_input_stream
58
- Parser.new(stream) do |parser|
79
+ @path = Pathname.new file
80
+ @zip = Zip::ZipFile.open(@path.realpath)
81
+ doc = @zip.find_entry('word/document.xml').get_input_stream
82
+ ref = @zip.find_entry('word/_rels/document.xml.rels').get_input_stream
83
+ Parser.new(doc, ref) do |parser|
59
84
  @contents = parser.parse
60
85
  @indecies = parser.indecies
86
+ @pictures = parser.pictures
61
87
  end
62
88
  @zip.close
63
89
  end
data/lib/ydocx/parser.rb CHANGED
@@ -8,11 +8,13 @@ require 'ydocx/markup_method'
8
8
  module YDocx
9
9
  class Parser
10
10
  include MarkupMethod
11
- attr_accessor :indecies, :result, :space
12
- def initialize(stream)
13
- @xml = Nokogiri::XML.parse(stream)
11
+ attr_accessor :indecies, :pictures, :result, :space
12
+ def initialize(doc, rel)
13
+ @doc = Nokogiri::XML.parse(doc)
14
+ @rel = Nokogiri::XML.parse(rel)
14
15
  @coder = HTMLEntities.new
15
16
  @indecies = []
17
+ @pictures = []
16
18
  @result = []
17
19
  @space = '&nbsp;'
18
20
  init
@@ -23,13 +25,13 @@ module YDocx
23
25
  def init
24
26
  end
25
27
  def parse
26
- @xml.xpath('//w:document//w:body').children.map do |node|
28
+ @doc.xpath('//w:document//w:body').children.map do |node|
27
29
  case node.node_name
28
30
  when 'text'
29
31
  @result << parse_paragraph(node)
30
32
  when 'tbl'
31
33
  @result << parse_table(node)
32
- when 'image'
34
+ when 'pict'
33
35
  # pending
34
36
  when 'p'
35
37
  @result << parse_paragraph(node)
@@ -82,7 +84,6 @@ module YDocx
82
84
  nil # default no block element
83
85
  end
84
86
  def optional_escape(text)
85
- return text = @space if text.empty?
86
87
  text.force_encoding('utf-8')
87
88
  # NOTE
88
89
  # :named only for escape at Builder
@@ -149,8 +150,22 @@ module YDocx
149
150
  #p "char : " + @coder.decode("&#%s;" % code.hex.to_s)
150
151
  end
151
152
  end
152
- def parse_image
153
- # pending
153
+ def parse_image(r)
154
+ if pict = r.xpath('w:pict') and
155
+ shape = pict.xpath('v:shape') and
156
+ image = shape.xpath('v:imagedata')
157
+ id = image.first['id'] # r:id
158
+ @rel.xpath('/').children.each do |element|
159
+ element.children.each do |rel|
160
+ if rel['Id'] == id and rel['Target']
161
+ src = id.downcase + '/' + File.basename(rel['Target'])
162
+ @pictures << src
163
+ return markup :img, [], {:src => src}
164
+ end
165
+ end
166
+ end
167
+ end
168
+ nil
154
169
  end
155
170
  def parse_paragraph(node)
156
171
  content = []
@@ -158,12 +173,12 @@ module YDocx
158
173
  pos = 0
159
174
  node.xpath('w:r').each do |r|
160
175
  unless r.xpath('w:t').empty?
161
- content << parse_text(r)
176
+ content << parse_text(r, (pos == 0)) # rm indent
162
177
  pos += 1
163
178
  else
164
179
  unless r.xpath('w:tab').empty?
165
180
  if content.last != @space and pos != 0 # ignore tab at line head
166
- content << optional_escape('')
181
+ content << @space
167
182
  pos += 1
168
183
  end
169
184
  end
@@ -172,6 +187,9 @@ module YDocx
172
187
  content << optional_replace(code)
173
188
  pos += 1
174
189
  end
190
+ unless r.xpath('w:pict').empty?
191
+ content << parse_image(r)
192
+ end
175
193
  end
176
194
  end
177
195
  content.compact!
@@ -209,7 +227,7 @@ module YDocx
209
227
  end
210
228
  table
211
229
  end
212
- def parse_text(r)
230
+ def parse_text(r, lstrip=false)
213
231
  text = r.xpath('w:t').map(&:text).join('')
214
232
  text = optional_escape(text)
215
233
  if rpr = r.xpath('w:rPr')
@@ -218,7 +236,7 @@ module YDocx
218
236
  block
219
237
  else
220
238
  # inline tag
221
- text = text.strip
239
+ text = text.lstrip if lstrip
222
240
  text = apply_align(rpr, text)
223
241
  unless rpr.xpath('w:u').empty?
224
242
  text = markup(:span, text, {:style => "text-decoration:underline;"})
@@ -6,6 +6,9 @@ require 'cgi'
6
6
  module YDocx
7
7
  class Parser
8
8
  private
9
+ def escape_as_id(text)
10
+ CGI.escape(text.gsub(/&(.)uml;/, '\1').gsub(/\s*\/\s*|\/|\s+/, '_').gsub(/(\?|_$)/, '').downcase)
11
+ end
9
12
  def parse_as_block(r, text)
10
13
  text = text.strip
11
14
  # TODO
@@ -17,7 +20,7 @@ module YDocx
17
20
  'Ind./Anw.m&ouml;gl.' => /^Indikationen(\s+|\s*(\/|und)\s*)Anwendungsm&ouml;glichkeiten$|^Indications/u, # 4
18
21
  'Interakt.' => /^Interaktionen$|^Interactions/u, # 8
19
22
  'Kontraind.' => /^Kontraindikationen($|\s*\(\s*absolute\s+Kontraindikationen\s*\)$)/u, # 6
20
- 'Name' => /^Name\s+des\s+Pr&auml;parates$/, # 1
23
+ 'Name' => /^Name\s+des\s+Pr&auml;parates$/u, # 1
21
24
  'Packungen' => /^Packungen($|\s*\(\s*mit\s+Angabe\s+der\s+Abgabekategorie\s*\)$)/u, # 18
22
25
  'Pr&auml;klin.' => /^Pr&auml;klinische\s+Daten$/u, # 15
23
26
  'Pharm.kinetik' => /^Pharmakokinetik($|\s*\((Absorption,\s*Distribution,\s*Metabolisms,\s*Elimination\s|Kinetik\s+spezieller\s+Patientengruppen)*\)$)|^Pharmacocin.tique?/iu, # 14
@@ -35,7 +38,7 @@ module YDocx
35
38
  if text =~ regexp
36
39
  next if !r.next.nil? and # skip matches in paragraph
37
40
  r.next.name.downcase != 'bookmarkend'
38
- id = CGI.escape(text.gsub(/&(.)uml;/, '\1').gsub(/\s*\/\s*|\/|\s+/, '_').downcase)
41
+ id = escape_as_id(text)
39
42
  @indecies << {:text => chapter, :id => id}
40
43
  return markup(:h3, text, {:id => id})
41
44
  end
@@ -52,6 +55,15 @@ module YDocx
52
55
  def init
53
56
  @container = markup(:div, [], {:id => 'container'})
54
57
  end
58
+ def build_xml
59
+ chapters = compile(@contents, :xml)
60
+ builder = Nokogiri::XML::Builder.new do |xml|
61
+ xml.document {
62
+ xml.chapters { xml << chapters }
63
+ }
64
+ end
65
+ builder.to_xml(:indent => 0, :encoding => 'utf-8').gsub(/\n/, '')
66
+ end
55
67
  private
56
68
  def build_before_content
57
69
  if @indecies
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'ydocx/templates/fachinfo'
5
+
6
+ module YDocx
7
+ class Parser
8
+ private
9
+ def parse_as_block(r, text)
10
+ text = text.strip
11
+ # TODO
12
+ # Franzoesisch
13
+ chapters = {
14
+ 'Ab&auml;nderung' => /^Was\s+sollte\s+dazu\s+beachtet\s+werden\s*\??$/u, # 4
15
+ 'Dos./Anw.' => /^Wie\s+verwenden\s+Sie\s+\w+\s*\??$/u, # 8
16
+ 'Eigensch.' => /^Was\s+ist\s+\w+\s+und\s+wann\s+wird\s+es\s+angewendet\s*\??$/u, # 3
17
+ 'Gew&ouml;hnliche H.' => /^Was\s+ist\s+ferner\s+zu\s+beachten\s*\??$/u, # 10
18
+ 'Hersteller' => /^Herstellerin$/u, # 15
19
+ 'Information' => /^Information\s+f&uuml;r\sPatientinnen\s+und\s+Patienten$/u, # 1
20
+ 'Kontraind.' => /^Wann\s+darf\s+\w+\s+nicht\s+(eingenommen\s*\/\s*angewendet|eingenommen|angewendet)\s*werden\s*\??$/u, # 5
21
+ 'Name' => /^Name\s+des\s+Pr&auml;parates$/u, # 2
22
+ 'Packungen' => /^Wo\s+erhalten\s+Sie\s+\w+\s*\?\s*Welche\s+Packungen\s+sind\s+erh&auml;ltlich\s*\??$/u, # 13
23
+ 'Schwanderschaft' => /^Darf\s+\w+\s+w&auml;hrend\s+einer\s+Schwangerschaft\s+oder\s+in\s+der\s+Stillzeit\s+(eingenommen\s*\/\s*angewendet|eingenommen|angewendet)\s*werden\s*\??$/u, # 7
24
+ 'Stand d. Info.' => /^Diese\sPackungsbeilage\s+wurde\s+im\s+[\.A-z\s0-9]+(\s+|\s*\/\s*\w+\s+\(Monat\s*\/\s*Jahr\)\s*)letztmals\s+durch\s+die\s+Arzneimittelbeh&ouml;rde\s*\(\s*Swissmedic\s*\)\s*gepr&uuml;ft.?$/u, # 16
25
+ 'Swissmedic-Nr.' => /^Zulassungsnummer$/u, # 12
26
+ 'Unerw.Wirkungen' => /^Welche\s+Nebenwirkungen\s+kann\s+\w+\s+haben\s*\??$/u, # 9
27
+ 'Verteiler' => /^Zulassungsinhaberin$/u, # 14
28
+ 'Vorbeugung' => /^Wann\s+ist\s+bei\s+der\s+(Einnahme\s*\/\s*Anwendung|Einnahme|Anwendung)\s*von\s+\w+\s+Vorsicht\s+geboten\s*\??$/u, # 6
29
+ 'Zusammens.' => /^Was\s+ist\s+in\s+\w+\s+enthalten\s*\??$/u, # 11
30
+ }.each_pair do |chapter, regexp|
31
+ if text =~ regexp
32
+ next if !r.next.nil? and # skip matches in paragraph
33
+ r.next.name.downcase != 'bookmarkend'
34
+ id = escape_as_id(text)
35
+ @indecies << {:text => chapter, :id => id}
36
+ return markup(:h3, text, {:id => id})
37
+ end
38
+ end
39
+ if r.parent.previous.nil? and @indecies.empty?
40
+ # The first line as package name
41
+ @indecies << {:text => 'Titel', :id => 'titel'}
42
+ return markup(:h2, text, {:id => 'titel'})
43
+ end
44
+ return nil
45
+ end
46
+ end
47
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ydocx
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-02 00:00:00.000000000 Z
12
+ date: 2012-05-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rdoc
16
- requirement: &19114300 !ruby/object:Gem::Requirement
16
+ requirement: &9555940 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '3.10'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *19114300
24
+ version_requirements: *9555940
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: hoe
27
- requirement: &19113780 !ruby/object:Gem::Requirement
27
+ requirement: &9555520 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: '2.13'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *19113780
35
+ version_requirements: *9555520
36
36
  description: ''
37
37
  email:
38
38
  - yasaka@ywesee.com, zdavatz@ywesee.com
@@ -59,6 +59,7 @@ files:
59
59
  - lib/ydocx/parser.rb
60
60
  - lib/ydocx/command.rb
61
61
  - lib/ydocx/templates/fachinfo.rb
62
+ - lib/ydocx/templates/patinfo.rb
62
63
  homepage: https://github.com/zdavatz/ydocx
63
64
  licenses: []
64
65
  post_install_message: