parchment 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.md +22 -0
- data/Parchment.rdoc +4 -0
- data/README.md +41 -0
- data/Rakefile +25 -0
- data/lib/parchment.rb +27 -0
- data/lib/parchment/document.rb +77 -0
- data/lib/parchment/formats/docx/document.rb +30 -0
- data/lib/parchment/formats/docx/docx.rb +17 -0
- data/lib/parchment/formats/docx/paragraph.rb +51 -0
- data/lib/parchment/formats/docx/style.rb +48 -0
- data/lib/parchment/formats/docx/text_run.rb +35 -0
- data/lib/parchment/formats/odt/document.rb +32 -0
- data/lib/parchment/formats/odt/odt.rb +17 -0
- data/lib/parchment/formats/odt/paragraph.rb +26 -0
- data/lib/parchment/formats/odt/style.rb +49 -0
- data/lib/parchment/formats/odt/text_run.rb +18 -0
- data/lib/parchment/helpers.rb +36 -0
- data/lib/parchment/paragraph.rb +72 -0
- data/lib/parchment/style.rb +61 -0
- data/lib/parchment/text_run.rb +57 -0
- data/lib/parchment/version.rb +3 -0
- metadata +136 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0d6c2118023a45879f6d70a6a9504da8af5e2942
|
4
|
+
data.tar.gz: a2a64bf4e2d264dd56f003a066fa027df6b0c911
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 92165f1b6633f8ae88545b415b5c53dddb532d0a9f9f84860dbc1e64cc381d84c2ac520da87e10a6a09d6e361dbb0b38c052f60ab6f7a3f1b2ae31590ffb79b5
|
7
|
+
data.tar.gz: d8d175ba3eff1d82cb5bf45680bfd498e1d7b362fb021074fae9b166f904bec55d91430e31cbbbdf8b2064ab0d7e27e0537f2e535adee0659cf5c4881735920b
|
data/LICENSE.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Allen Petlock
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Parchment.rdoc
ADDED
data/README.md
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Parchment
|
2
|
+
|
3
|
+
Parchment is a simple, flexible library for interacting with word processing
|
4
|
+
document files. Initially intended for outputting .docx and .odt files as
|
5
|
+
HTML fragments, it is built to be flexible for future use.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'parchment'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install parchment
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
require 'parchment'
|
24
|
+
doc = Parchment.read('path/to/file.odt')
|
25
|
+
doc.to_html
|
26
|
+
|
27
|
+
## Contributing
|
28
|
+
|
29
|
+
1. Fork it ( https://github.com/apetlock/parchment/fork )
|
30
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
31
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
32
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
33
|
+
5. Create a new Pull Request
|
34
|
+
|
35
|
+
### RSpec tests
|
36
|
+
|
37
|
+
I love TDD/BDD, but not to an insane level. Any tests written should test
|
38
|
+
input and output. Did the file get read properly? Is it outputting properly?
|
39
|
+
Internals do not need to be tested. For example, testing if a paragraph is
|
40
|
+
aligned_right? is necessary. Making sure that the class stores the alignment
|
41
|
+
as String 'right' is not.
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
require 'rdoc/task'
|
4
|
+
|
5
|
+
desc 'Run RSpec tests'
|
6
|
+
RSpec::Core::RakeTask.new :spec
|
7
|
+
task test: :spec
|
8
|
+
|
9
|
+
desc 'Generate RDoc documentation'
|
10
|
+
RDoc::Task.new :doc do |rdoc|
|
11
|
+
rdoc.rdoc_files.include('Parchment.rdoc', 'lib/**/*.rb')
|
12
|
+
rdoc.main = 'Parchment.rdoc'
|
13
|
+
rdoc.rdoc_dir = 'doc'
|
14
|
+
rdoc.options << '--force-update'
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'Launch library console'
|
18
|
+
task :console do
|
19
|
+
sh 'irb -I lib -r parchment'
|
20
|
+
end
|
21
|
+
|
22
|
+
desc 'description'
|
23
|
+
task :default do
|
24
|
+
puts 'Available commands: test, doc, console'
|
25
|
+
end
|
data/lib/parchment.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'parchment/document'
|
2
|
+
require 'parchment/formats/odt/odt'
|
3
|
+
require 'parchment/formats/docx/docx'
|
4
|
+
|
5
|
+
module Parchment
|
6
|
+
|
7
|
+
# Reads +path+ and determines which format module to use for reading
|
8
|
+
# the file.
|
9
|
+
#
|
10
|
+
def self.read(path)
|
11
|
+
extension = path.split('.').last
|
12
|
+
case extension
|
13
|
+
when 'odt'
|
14
|
+
Parchment::ODT.read(path)
|
15
|
+
when 'docx'
|
16
|
+
Parchment::DOCX.read(path)
|
17
|
+
else
|
18
|
+
raise UnsupportedFileFormatError, 'File format is not supported.'
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class UnsupportedFileFormatError < LoadError
|
23
|
+
end
|
24
|
+
|
25
|
+
class MissingFormatterMethodError < NotImplementedError
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'parchment/paragraph'
|
3
|
+
require 'parchment/style'
|
4
|
+
|
5
|
+
module Parchment
|
6
|
+
|
7
|
+
# = Primary Document class.
|
8
|
+
#
|
9
|
+
# A Document is the primary "container" for everything necessary to format
|
10
|
+
# and display its contents. Holds Paragraph and Style objects.
|
11
|
+
#
|
12
|
+
class Document
|
13
|
+
|
14
|
+
# (Array) All the Styles that belong to the Document that other objects
|
15
|
+
# (i.e. Paragraphs, TextRuns) can reference and apply.
|
16
|
+
attr_reader :styles
|
17
|
+
|
18
|
+
# (Array) Paragraph objects that belong to the Document.
|
19
|
+
attr_reader :paragraphs
|
20
|
+
|
21
|
+
# (Style) A Style referenced when style attributes on the child are
|
22
|
+
# not available.
|
23
|
+
attr_reader :default_paragraph_style
|
24
|
+
|
25
|
+
# content_file:: (File) The primary content file of the document.
|
26
|
+
# styles_file:: (File) The styles file from the document.
|
27
|
+
#
|
28
|
+
def initialize(content_file, styles_file)
|
29
|
+
@content_xml = Nokogiri::XML(content_file)
|
30
|
+
@styles_xml = Nokogiri::XML(styles_file)
|
31
|
+
set_styles
|
32
|
+
set_paragraphs
|
33
|
+
end
|
34
|
+
|
35
|
+
# Returns the Style based on the +id+ given.
|
36
|
+
#
|
37
|
+
# The XML document formats in particular store their styles in elements
|
38
|
+
# with unique identifiers. OpenOffice uses these extensively, relying
|
39
|
+
# on them to specify formatting for the text. DOCX, not so much, but
|
40
|
+
# it does have user-defined styles.
|
41
|
+
#
|
42
|
+
#--
|
43
|
+
# DOCX styles are not implemented yet.
|
44
|
+
#++
|
45
|
+
#
|
46
|
+
# id:: (String) The unique identifier of the Style.
|
47
|
+
#
|
48
|
+
def get_style_by_id(id)
|
49
|
+
styles.select { |style| id == style.id }.first
|
50
|
+
end
|
51
|
+
|
52
|
+
# Output entire document as a HTML fragment String.
|
53
|
+
#
|
54
|
+
def to_html
|
55
|
+
paragraphs.map(&:to_html).join('\n')
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
# These methods add the Document's children and default settings.
|
61
|
+
#
|
62
|
+
# These methods should be defined in the appropriate class in the
|
63
|
+
# formatter module. i.e. Parchment::ODT::Document will have these.
|
64
|
+
#
|
65
|
+
def set_paragraphs
|
66
|
+
raise MissingFormatterMethodError
|
67
|
+
end
|
68
|
+
|
69
|
+
def set_styles
|
70
|
+
raise MissingFormatterMethodError
|
71
|
+
end
|
72
|
+
|
73
|
+
def set_default_paragraph_style
|
74
|
+
raise MissingFormatterMethodError
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'parchment/document'
|
2
|
+
require_relative 'paragraph'
|
3
|
+
require_relative 'style'
|
4
|
+
|
5
|
+
module Parchment
|
6
|
+
module DOCX
|
7
|
+
class Document < Parchment::Document
|
8
|
+
|
9
|
+
private
|
10
|
+
|
11
|
+
# These methods parse and add the Document's children and defaults.
|
12
|
+
#
|
13
|
+
def set_paragraphs
|
14
|
+
set_default_paragraph_style
|
15
|
+
paragraph_nodes = @content_xml.xpath('.//w:document//w:body//w:p')
|
16
|
+
@paragraphs = paragraph_nodes.map { |node| Parchment::DOCX::Paragraph.new(node, self) }
|
17
|
+
end
|
18
|
+
|
19
|
+
def set_styles
|
20
|
+
style_nodes = @styles_xml.xpath('.//w:style')
|
21
|
+
@styles = style_nodes.map { |node| Parchment::DOCX::Style.new_from_node(node) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def set_default_paragraph_style
|
25
|
+
doc_style_node = @styles_xml.xpath('.//w:docDefaults').first
|
26
|
+
@default_paragraph_style = Parchment::DOCX::Style.new_default_style(doc_style_node)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'zip'
|
2
|
+
require_relative 'document'
|
3
|
+
|
4
|
+
module Parchment
|
5
|
+
|
6
|
+
# = Parchment OfficeOpen (.docx) format parser
|
7
|
+
#
|
8
|
+
module DOCX
|
9
|
+
|
10
|
+
def self.read(path)
|
11
|
+
zip = Zip::File.open(path)
|
12
|
+
document_file = zip.read('word/document.xml')
|
13
|
+
styles_file = zip.read('word/styles.xml')
|
14
|
+
Parchment::DOCX::Document.new(document_file, styles_file)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'parchment/paragraph'
|
2
|
+
require_relative 'style'
|
3
|
+
require_relative 'text_run'
|
4
|
+
|
5
|
+
module Parchment
|
6
|
+
module DOCX
|
7
|
+
class Paragraph < Parchment::Paragraph
|
8
|
+
|
9
|
+
def initialize(node, document)
|
10
|
+
@node = node
|
11
|
+
@style_id = nil
|
12
|
+
@document = document
|
13
|
+
set_style
|
14
|
+
super()
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
# Because OfficeOpen puts all formatting on the individual
|
20
|
+
# elements rather than refer to a defined style, they need
|
21
|
+
# to be created from the element itself.
|
22
|
+
#
|
23
|
+
def set_style
|
24
|
+
@style = Style.new
|
25
|
+
|
26
|
+
alignment_node = @node.xpath('.//w:jc').first
|
27
|
+
alignment = alignment_node ? alignment_node.attributes['val'].value : nil
|
28
|
+
@style.instance_variable_set('@text_align', alignment.to_sym) if alignment
|
29
|
+
|
30
|
+
size_node = @node.xpath('w:pPr//w:sz').first
|
31
|
+
font_size = size_node ? size_node.attributes['val'].value.to_i / 2 : nil
|
32
|
+
@style.instance_variable_set('@font_size', font_size)
|
33
|
+
|
34
|
+
bold_node = @node.xpath('w:pPr//w:b').first
|
35
|
+
@style.instance_variable_set('@font_weight', 'bold') if bold_node
|
36
|
+
|
37
|
+
italic_node = @node.xpath('w:pPr//w:i').first
|
38
|
+
@style.instance_variable_set('@font_style', 'italic') if italic_node
|
39
|
+
|
40
|
+
underline_node = @node.xpath('w:pPr//w:u').first
|
41
|
+
@style.instance_variable_set('@text_underline_style', 'solid') if underline_node
|
42
|
+
end
|
43
|
+
|
44
|
+
def set_text_runs
|
45
|
+
@text_runs = @node.xpath('.//w:r').map do |child|
|
46
|
+
Parchment::DOCX::TextRun.new(child, self, @document)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'parchment/style'
|
2
|
+
|
3
|
+
module Parchment
|
4
|
+
module DOCX
|
5
|
+
class Style < Parchment::Style
|
6
|
+
|
7
|
+
#--
|
8
|
+
# I don't like particularly how this is set up, but the OfficeOpen
|
9
|
+
# format has styles split up between a global docDefaults and separate
|
10
|
+
# styles which relate to the ones set up in word. Unlike ODT, all
|
11
|
+
# the styles are set on paragraphs and runs individually, rather than
|
12
|
+
# referring to an embedded style. But, we still want a style Object,
|
13
|
+
# so there's two creation methods here.
|
14
|
+
#++
|
15
|
+
def initialize
|
16
|
+
end
|
17
|
+
|
18
|
+
# Creates a new Style from the XML w:style element passed in.
|
19
|
+
#
|
20
|
+
def self.new_from_node(node)
|
21
|
+
style = self.new
|
22
|
+
@node = node
|
23
|
+
instance_variable_set('@family',@node.attributes['type'].value)
|
24
|
+
instance_variable_set('@id', @node.attributes['styleId'].value)
|
25
|
+
return style
|
26
|
+
end
|
27
|
+
|
28
|
+
# The OfficeOpen format has a spcific docDefaults block which
|
29
|
+
# describes the globals for the document. This creates a Style
|
30
|
+
# Object from that element.
|
31
|
+
#
|
32
|
+
def self.new_default_style(node)
|
33
|
+
style = self.new
|
34
|
+
|
35
|
+
# Right now, only concerned about document global font size.
|
36
|
+
#
|
37
|
+
# OfficeOpen specifications store the font size as half-points. Meaning if
|
38
|
+
# something is at 12 points, it will be 24. We want actual full-point size.
|
39
|
+
#
|
40
|
+
font_size_tag = node.xpath('//w:docDefaults//w:rPrDefault//w:rPr//w:sz').first
|
41
|
+
font_size = font_size_tag ? font_size_tag.attributes['val'].value.to_i / 2 : nil
|
42
|
+
|
43
|
+
style.instance_variable_set('@font_size', font_size)
|
44
|
+
return style
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Parchment
|
2
|
+
module DOCX
|
3
|
+
class TextRun < Parchment::TextRun
|
4
|
+
|
5
|
+
def initialize(node, paragraph, document)
|
6
|
+
@node = node
|
7
|
+
set_style
|
8
|
+
super(paragraph, document)
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
# Because OfficeOpen puts all formatting on the individual
|
14
|
+
# elements rather than refer to a defined style, they need
|
15
|
+
# to be created from the element itself.
|
16
|
+
#
|
17
|
+
def set_style
|
18
|
+
@style = Style.new
|
19
|
+
|
20
|
+
size_node = @node.xpath('.//w:sz').first
|
21
|
+
font_size = size_node ? size_node.attributes['val'].value.to_i / 2 : nil
|
22
|
+
@style.instance_variable_set('@font_size', font_size)
|
23
|
+
|
24
|
+
font_weight = @node.xpath('.//w:b').empty? ? 'normal' : 'bold'
|
25
|
+
@style.instance_variable_set('@font_weight', font_weight)
|
26
|
+
|
27
|
+
font_style = @node.xpath('.//w:i').empty? ? 'normal' : 'italic'
|
28
|
+
@style.instance_variable_set('@font_style', font_style)
|
29
|
+
|
30
|
+
underline_style = @node.xpath('.//w:u').empty? ? nil : 'solid'
|
31
|
+
@style.instance_variable_set('@text_underline_style', underline_style)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'parchment/document'
|
2
|
+
require_relative 'paragraph'
|
3
|
+
require_relative 'style'
|
4
|
+
|
5
|
+
module Parchment
|
6
|
+
module ODT
|
7
|
+
class Document < Parchment::Document
|
8
|
+
|
9
|
+
private
|
10
|
+
|
11
|
+
# These methods parse and add the Document's children and defaults.
|
12
|
+
#
|
13
|
+
def set_paragraphs
|
14
|
+
set_default_paragraph_style
|
15
|
+
paragraph_nodes = @content_xml.xpath('.//office:body/office:text/text:p')
|
16
|
+
@paragraphs = paragraph_nodes.map { |node| Parchment::ODT::Paragraph.new(node, self) }
|
17
|
+
end
|
18
|
+
|
19
|
+
def set_styles
|
20
|
+
style_nodes = @content_xml.xpath('.//office:automatic-styles/style:style')
|
21
|
+
@styles = style_nodes.map { |node| Parchment::ODT::Style.new(node) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def set_default_paragraph_style
|
25
|
+
style_nodes = @styles_xml.xpath('.//office:styles/style:default-style').select do |style|
|
26
|
+
style.attributes['family'].value == 'paragraph'
|
27
|
+
end
|
28
|
+
@default_paragraph_style = Parchment::ODT::Style.new(style_nodes.first)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'zip'
|
2
|
+
require_relative 'document'
|
3
|
+
|
4
|
+
module Parchment
|
5
|
+
|
6
|
+
# = Parchment OpenOffice (.odt) format parser
|
7
|
+
#
|
8
|
+
module ODT
|
9
|
+
|
10
|
+
def self.read(path)
|
11
|
+
zip = Zip::File.open(path)
|
12
|
+
document_file = zip.read('content.xml')
|
13
|
+
styles_file = zip.read('styles.xml')
|
14
|
+
Parchment::ODT::Document.new(document_file, styles_file)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'parchment/paragraph'
|
2
|
+
require_relative 'style'
|
3
|
+
require_relative 'text_run'
|
4
|
+
|
5
|
+
module Parchment
|
6
|
+
module ODT
|
7
|
+
class Paragraph < Parchment::Paragraph
|
8
|
+
|
9
|
+
def initialize(node, document)
|
10
|
+
@node = node
|
11
|
+
@style_id = @node.attributes['style-name'].value
|
12
|
+
@document = document
|
13
|
+
@style = @document.get_style_by_id(@style_id)
|
14
|
+
super()
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def set_text_runs
|
20
|
+
@text_runs = @node.children.map do |child|
|
21
|
+
Parchment::ODT::TextRun.new(child, self, @document)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'parchment/style'
|
2
|
+
|
3
|
+
module Parchment
|
4
|
+
module ODT
|
5
|
+
class Style < Parchment::Style
|
6
|
+
|
7
|
+
# Because the OpenOffice standard uses 'start', 'end', etc.
|
8
|
+
ALIGNMENT_CONVERSION = {
|
9
|
+
start: :left,
|
10
|
+
end: :right,
|
11
|
+
center: :center
|
12
|
+
}
|
13
|
+
|
14
|
+
TEXT_PROPERTIES = [
|
15
|
+
'font-size',
|
16
|
+
'font-weight',
|
17
|
+
'font-style',
|
18
|
+
'text-underline-style'
|
19
|
+
]
|
20
|
+
|
21
|
+
def initialize(node)
|
22
|
+
@node = node
|
23
|
+
@node.attributes.map { |k, v| [k, v.value] }.each do |prop|
|
24
|
+
prop_name = prop[0].gsub('-', '_')
|
25
|
+
instance_variable_set("@#{prop_name}", prop[1])
|
26
|
+
end
|
27
|
+
instance_variable_set("@id", @name)
|
28
|
+
@node.children.each do |style_child|
|
29
|
+
case style_child.name
|
30
|
+
when 'paragraph-properties'
|
31
|
+
if style_child.attributes['text-align']
|
32
|
+
@text_align = ALIGNMENT_CONVERSION[style_child.attributes['text-align'].value.to_sym]
|
33
|
+
end
|
34
|
+
when 'text-properties'
|
35
|
+
TEXT_PROPERTIES.each do |prop|
|
36
|
+
style_attr = style_child.attributes[prop]
|
37
|
+
if style_attr
|
38
|
+
value = style_attr.value
|
39
|
+
value = value.to_i if prop == 'font-size'
|
40
|
+
instance_variable_set("@#{prop.gsub('-', '_')}", value)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Parchment
|
2
|
+
module ODT
|
3
|
+
class TextRun < Parchment::TextRun
|
4
|
+
|
5
|
+
def initialize(node, paragraph, document)
|
6
|
+
@node = node
|
7
|
+
if @node.attributes.empty?
|
8
|
+
@style = paragraph.style
|
9
|
+
else
|
10
|
+
@style_id = @node.attributes['style-name'].value
|
11
|
+
@style = document.get_style_by_id(@style_id)
|
12
|
+
end
|
13
|
+
super(paragraph, document)
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Parchment
|
2
|
+
|
3
|
+
# Helper methods useful to multiple classes.
|
4
|
+
#
|
5
|
+
module Helpers
|
6
|
+
|
7
|
+
# Wraps content in an HTML tag, returning the element.
|
8
|
+
# Currently used in Paragraph and TextRun for the to_html methods
|
9
|
+
#
|
10
|
+
# name:: (String) The name of the HTML tag. (e.g. 'p', 'span')
|
11
|
+
# options:: (Hash) Options that describe the element.
|
12
|
+
#
|
13
|
+
# ==== Options
|
14
|
+
# +content:+ (String) The base text content for the tag.
|
15
|
+
#
|
16
|
+
# +styles:+ (Hash) CSS styles and values to be applied. e.g.
|
17
|
+
# { 'font-size' => '12pt', 'text-decoration' => 'underline' }
|
18
|
+
#
|
19
|
+
def html_tag(name, options = {})
|
20
|
+
content = options[:content]
|
21
|
+
styles = options[:styles]
|
22
|
+
|
23
|
+
html = "<#{name.to_s}"
|
24
|
+
unless styles.nil? || styles.empty?
|
25
|
+
styles_array = []
|
26
|
+
styles.each do |property, value|
|
27
|
+
styles_array << "#{property.to_s}:#{value};"
|
28
|
+
end
|
29
|
+
html << " style=\"#{styles_array.join('')}\""
|
30
|
+
end
|
31
|
+
html << ">"
|
32
|
+
html << content if content
|
33
|
+
html << "</#{name.to_s}>"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'parchment/style'
|
2
|
+
require 'parchment/text_run'
|
3
|
+
require 'parchment/helpers'
|
4
|
+
|
5
|
+
module Parchment
|
6
|
+
|
7
|
+
# A Paragraph holds several TextRun objects and default formatting for them.
|
8
|
+
#
|
9
|
+
class Paragraph
|
10
|
+
include Parchment::Helpers
|
11
|
+
|
12
|
+
# (Style) The primary Style for the Paragraph.
|
13
|
+
attr_reader :style
|
14
|
+
|
15
|
+
# (Array) All the TextRun children that the Paragraph has.
|
16
|
+
attr_reader :text_runs
|
17
|
+
|
18
|
+
# (Integer) It's what it sounds like.
|
19
|
+
attr_reader :default_font_size
|
20
|
+
|
21
|
+
# This does not accept any arguments because the primary work
|
22
|
+
# for this is done in the formatter's subclass.
|
23
|
+
#
|
24
|
+
def initialize
|
25
|
+
raise MissingFormatterMethodError unless @node
|
26
|
+
@default_font_size = @document.default_paragraph_style.font_size
|
27
|
+
set_text_runs
|
28
|
+
end
|
29
|
+
|
30
|
+
# The font size of the Paragraph. Will return the Document's default
|
31
|
+
# font size if not defined.
|
32
|
+
#
|
33
|
+
def font_size
|
34
|
+
@style.font_size || @default_font_size
|
35
|
+
end
|
36
|
+
|
37
|
+
# This is a method constructor which creates Boolean methods (e.g. bold?,
|
38
|
+
# italic?) based on the available formatting options defined in Style.
|
39
|
+
#
|
40
|
+
Parchment::Style::AVAILABLE_FORMATTING.each do |styling|
|
41
|
+
define_method("#{styling}?") { style.public_send("#{styling}?") }
|
42
|
+
end
|
43
|
+
|
44
|
+
# Output the unformatted Paragraph's content as a String.
|
45
|
+
#
|
46
|
+
def to_s
|
47
|
+
@node.content
|
48
|
+
end
|
49
|
+
alias :text :to_s
|
50
|
+
|
51
|
+
# Return a HTML element String with formatting based on the Paragraph's
|
52
|
+
# properties.
|
53
|
+
#
|
54
|
+
def to_html
|
55
|
+
html = ''
|
56
|
+
text_runs.each { |text_run| html << text_run.to_html }
|
57
|
+
styles = { 'font-size' => "#{font_size}pt" }
|
58
|
+
styles['text-align'] = @style.text_align unless @style.aligned_left?
|
59
|
+
html_tag(:p, content: html, styles: styles)
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
# Parses and creates the TextRun objects that belong to the Paragraph.
|
65
|
+
#
|
66
|
+
# *This needs to be defined in the formatter's Paragraph class.*
|
67
|
+
#
|
68
|
+
def set_text_runs
|
69
|
+
raise MissingFormatterMethodError
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Parchment
|
2
|
+
class Style
|
3
|
+
|
4
|
+
AVAILABLE_FORMATTING = %w(
|
5
|
+
bold
|
6
|
+
italic
|
7
|
+
underline
|
8
|
+
aligned_left
|
9
|
+
aligned_right
|
10
|
+
aligned_center
|
11
|
+
)
|
12
|
+
|
13
|
+
attr_reader :id,
|
14
|
+
:name,
|
15
|
+
:family,
|
16
|
+
:parent_style_name,
|
17
|
+
:text_align,
|
18
|
+
:font_size,
|
19
|
+
:font_weight,
|
20
|
+
:font_style,
|
21
|
+
:text_underline_style
|
22
|
+
|
23
|
+
# This needs to be defined in each format's subclass.
|
24
|
+
#
|
25
|
+
def initialize(node)
|
26
|
+
raise MissingFormatterMethodError
|
27
|
+
end
|
28
|
+
|
29
|
+
def paragraph?
|
30
|
+
@family == 'paragraph'
|
31
|
+
end
|
32
|
+
|
33
|
+
def text?
|
34
|
+
@family == 'text'
|
35
|
+
end
|
36
|
+
|
37
|
+
def bold?
|
38
|
+
@font_weight == 'bold'
|
39
|
+
end
|
40
|
+
|
41
|
+
def italic?
|
42
|
+
@font_style == 'italic'
|
43
|
+
end
|
44
|
+
|
45
|
+
def underline?
|
46
|
+
!@text_underline_style.nil?
|
47
|
+
end
|
48
|
+
|
49
|
+
def aligned_left?
|
50
|
+
[:left, nil].include?(@text_align)
|
51
|
+
end
|
52
|
+
|
53
|
+
def aligned_right?
|
54
|
+
@text_align == :right
|
55
|
+
end
|
56
|
+
|
57
|
+
def aligned_center?
|
58
|
+
@text_align == :center
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'parchment/helpers'
|
2
|
+
|
3
|
+
module Parchment
|
4
|
+
|
5
|
+
# A "run" of text within a Paragraph. Each run may have its own style
|
6
|
+
# attributes different from that of the Paragraph. These are iterated
|
7
|
+
# through to generate a line of formatted output.
|
8
|
+
#
|
9
|
+
class TextRun
|
10
|
+
include Parchment::Helpers
|
11
|
+
|
12
|
+
# (Style) The primary Style for the TextRun.
|
13
|
+
attr_reader :style
|
14
|
+
|
15
|
+
def initialize(paragraph, document)
|
16
|
+
raise MissingFormatterMethodError unless @node
|
17
|
+
@content = @node.content
|
18
|
+
@default_font_size = paragraph.font_size
|
19
|
+
end
|
20
|
+
|
21
|
+
# The font size of the TextRun. Will return the Paragraph's default
|
22
|
+
# font size if not defined.
|
23
|
+
#
|
24
|
+
def font_size
|
25
|
+
@style.font_size || @default_font_size
|
26
|
+
end
|
27
|
+
|
28
|
+
# This is a method constructor which creates Boolean methods (e.g. bold?,
|
29
|
+
# italic?) based on the available formatting options defined in Style.
|
30
|
+
#
|
31
|
+
Parchment::Style::AVAILABLE_FORMATTING.each do |styling|
|
32
|
+
define_method("#{styling}?") { style.public_send("#{styling}?") }
|
33
|
+
end
|
34
|
+
|
35
|
+
# Output the unformatted TextRun's content as a String.
|
36
|
+
#
|
37
|
+
def to_s
|
38
|
+
@content
|
39
|
+
end
|
40
|
+
alias :text :to_s
|
41
|
+
|
42
|
+
# Return a HTML element String with formatting based on the TextRun's
|
43
|
+
# properties.
|
44
|
+
#
|
45
|
+
def to_html
|
46
|
+
html = @content
|
47
|
+
html = html_tag(:em, content: html) if italic?
|
48
|
+
html = html_tag(:strong, content: html) if bold?
|
49
|
+
styles = {}
|
50
|
+
styles['text-decoration'] = 'underline' if underline?
|
51
|
+
# No need to be granular with font size down to the span level if it doesn't vary.
|
52
|
+
styles['font-size'] = "#{font_size}pt" if font_size != @default_font_size
|
53
|
+
html = html_tag(:span, content: html, styles: styles) unless styles.empty?
|
54
|
+
return html
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
metadata
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: parchment
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Allen Petlock
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-01 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: nokogiri
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.6'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.6'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rubyzip
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1'
|
83
|
+
description: A simple library for reading and intereacting with word processing documents,
|
84
|
+
such as ODT, DOCX, etc.
|
85
|
+
email:
|
86
|
+
- apetlock@gmail.com
|
87
|
+
executables: []
|
88
|
+
extensions: []
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- Rakefile
|
92
|
+
- lib/parchment/text_run.rb
|
93
|
+
- lib/parchment/formats/docx/docx.rb
|
94
|
+
- lib/parchment/formats/docx/text_run.rb
|
95
|
+
- lib/parchment/formats/docx/document.rb
|
96
|
+
- lib/parchment/formats/docx/style.rb
|
97
|
+
- lib/parchment/formats/docx/paragraph.rb
|
98
|
+
- lib/parchment/formats/odt/text_run.rb
|
99
|
+
- lib/parchment/formats/odt/document.rb
|
100
|
+
- lib/parchment/formats/odt/style.rb
|
101
|
+
- lib/parchment/formats/odt/odt.rb
|
102
|
+
- lib/parchment/formats/odt/paragraph.rb
|
103
|
+
- lib/parchment/document.rb
|
104
|
+
- lib/parchment/style.rb
|
105
|
+
- lib/parchment/version.rb
|
106
|
+
- lib/parchment/helpers.rb
|
107
|
+
- lib/parchment/paragraph.rb
|
108
|
+
- lib/parchment.rb
|
109
|
+
- README.md
|
110
|
+
- LICENSE.md
|
111
|
+
- Parchment.rdoc
|
112
|
+
homepage: https://github.com/apetlock/parchment
|
113
|
+
licenses:
|
114
|
+
- MIT
|
115
|
+
metadata: {}
|
116
|
+
post_install_message:
|
117
|
+
rdoc_options: []
|
118
|
+
require_paths:
|
119
|
+
- lib
|
120
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - '>='
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - '>='
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0'
|
130
|
+
requirements: []
|
131
|
+
rubyforge_project:
|
132
|
+
rubygems_version: 2.0.14
|
133
|
+
signing_key:
|
134
|
+
specification_version: 4
|
135
|
+
summary: Simple word processing document interaction
|
136
|
+
test_files: []
|