notroff 0.2.13 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/yesroff +30 -0
- data/lib/notroff/type_assigner.rb +1 -1
- data/lib/yesroff.rb +3 -0
- data/lib/yesroff/nr_writer.rb +51 -0
- data/lib/yesroff/odt_parser.rb +154 -0
- data/lib/yesroff/text.rb +110 -0
- metadata +8 -2
data/bin/yesroff
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'pp'
|
3
|
+
require "yesroff"
|
4
|
+
|
5
|
+
unless ARGV.size == 1
|
6
|
+
raise "Usage: yesroff <<odt_file>>"
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
parser = OdtParser.new(ARGV[0])
|
11
|
+
parser.parse
|
12
|
+
parser.render
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
|
@@ -21,7 +21,7 @@ class TypeAssigner
|
|
21
21
|
processed_paragraphs << paragraph
|
22
22
|
end
|
23
23
|
|
24
|
-
current_type = :body if [ :section, :title, :code1 ].include?(type)
|
24
|
+
current_type = :body if [ :section, :sec, :c1, :subsec, :title, :code1 ].include?(type)
|
25
25
|
end
|
26
26
|
processed_paragraphs
|
27
27
|
end
|
data/lib/yesroff.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
class NRWriter
|
2
|
+
attr_reader :para_style
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@para_style = :body
|
6
|
+
@single_line = false
|
7
|
+
end
|
8
|
+
|
9
|
+
def switch_para_style(new_style, single_line)
|
10
|
+
#end_paragraph
|
11
|
+
return if new_style == @para_style
|
12
|
+
print ".#{new_style}"
|
13
|
+
if single_line
|
14
|
+
print ' '
|
15
|
+
else
|
16
|
+
print "\n"
|
17
|
+
end
|
18
|
+
@para_style = new_style
|
19
|
+
@single_line = single_line
|
20
|
+
end
|
21
|
+
|
22
|
+
def end_paragraph
|
23
|
+
puts
|
24
|
+
if @single_line
|
25
|
+
@para_style = :body
|
26
|
+
@single_line = false
|
27
|
+
elsif @para_style != :code && @para_style != :listing
|
28
|
+
puts
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def indent(n)
|
33
|
+
print (' ' * n)
|
34
|
+
|
35
|
+
end
|
36
|
+
def toggle_bold
|
37
|
+
print "!!"
|
38
|
+
end
|
39
|
+
|
40
|
+
def toggle_italic
|
41
|
+
print "~~"
|
42
|
+
end
|
43
|
+
|
44
|
+
def toggle_code
|
45
|
+
print '@@'
|
46
|
+
end
|
47
|
+
|
48
|
+
def text(t)
|
49
|
+
print t
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
require 'pp'
|
3
|
+
require 'zip/zipfilesystem'
|
4
|
+
|
5
|
+
class OdtParser
|
6
|
+
def initialize(odt_path)
|
7
|
+
Zip::ZipFile.open(odt_path ) do |zipfile|
|
8
|
+
zipfile.file.open("content.xml") do |content|
|
9
|
+
@doc = REXML::Document.new(content.read)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
@writer = NRWriter.new
|
14
|
+
@paras = []
|
15
|
+
@text_styles = default_text_styles
|
16
|
+
@para_styles = default_para_styles
|
17
|
+
end
|
18
|
+
|
19
|
+
def default_text_styles
|
20
|
+
cd1 = TextStyle.new("CD1")
|
21
|
+
cd1.code = true
|
22
|
+
|
23
|
+
styles = [
|
24
|
+
cd1,
|
25
|
+
TextStyle.new("Default"),
|
26
|
+
TextStyle.new("C1"),
|
27
|
+
TextStyle.new("C1_20_HD"),
|
28
|
+
TextStyle.new("FN"),
|
29
|
+
TextStyle.new("Base_20_Font"),
|
30
|
+
TextStyle.new("Chapter_20_Word")
|
31
|
+
]
|
32
|
+
hash = {}
|
33
|
+
styles.each {|s| hash[s.name] = s}
|
34
|
+
hash
|
35
|
+
end
|
36
|
+
|
37
|
+
def default_para_styles
|
38
|
+
styles = [
|
39
|
+
ParagraphStyle.new('FT', nil, :body, false),
|
40
|
+
ParagraphStyle.new('IT', nil, :body, false),
|
41
|
+
ParagraphStyle.new('Quotation', nil, :quote, true),
|
42
|
+
ParagraphStyle.new('CDT1', nil, :code, false),
|
43
|
+
ParagraphStyle.new('CDT', nil, :code, false),
|
44
|
+
ParagraphStyle.new('HA', nil, :title, true),
|
45
|
+
ParagraphStyle.new('HB', nil, :subtitle, true),
|
46
|
+
ParagraphStyle.new('HC', nil, :sec, true),
|
47
|
+
ParagraphStyle.new('HD', nil, :subsec, true),
|
48
|
+
ParagraphStyle.new('LH', nil, :ltitle, true),
|
49
|
+
ParagraphStyle.new('LC', nil, :listing, false),
|
50
|
+
ParagraphStyle.new('LC2', nil, :listing, false),
|
51
|
+
ParagraphStyle.new('LX', nil, :listing, false),
|
52
|
+
ParagraphStyle.new('C1', nil, :c1, true),
|
53
|
+
ParagraphStyle.new('BL1', nil, :bullet, true),
|
54
|
+
ParagraphStyle.new('BL', nil, :bullet, true),
|
55
|
+
ParagraphStyle.new('BX', nil, :bullet, true),
|
56
|
+
ParagraphStyle.new('Quotation_20_Attribution', nil, :attribution, true),
|
57
|
+
ParagraphStyle.new('CDTX', nil, :code, false)
|
58
|
+
]
|
59
|
+
|
60
|
+
hash = {}
|
61
|
+
styles.each {|s| hash[s.name] = s}
|
62
|
+
hash
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse
|
66
|
+
parse_text_styles
|
67
|
+
parse_paragraph_styles
|
68
|
+
@paras = parse_paragraphs
|
69
|
+
end
|
70
|
+
|
71
|
+
def render
|
72
|
+
@paras.each {|p| p.render(@writer)}
|
73
|
+
end
|
74
|
+
|
75
|
+
def parse_text_styles
|
76
|
+
styles = REXML::XPath.match(@doc, "//style:style[@style:family='text']")
|
77
|
+
styles.each do |s|
|
78
|
+
attrs = s.attributes
|
79
|
+
style = TextStyle.new(attrs['style:name'])
|
80
|
+
props = REXML::XPath.first(s, "./style:text-properties")
|
81
|
+
if props
|
82
|
+
style.bold = (props.attributes['fo:font-weight'] == 'bold')
|
83
|
+
style.italic = (/italic/i =~ props.attributes['style:font-name']) ||
|
84
|
+
(props.attributes['fo:font-style'] == 'italic')
|
85
|
+
end
|
86
|
+
@text_styles[style.name] = style
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def parse_paragraph_styles
|
91
|
+
styles = REXML::XPath.match(@doc, "//style:style[@style:family='paragraph']")
|
92
|
+
styles.each do |s|
|
93
|
+
attrs = s.attributes
|
94
|
+
style = ParagraphStyle.new(attrs['style:name'])
|
95
|
+
style.parent = lookup_para_style(attrs['parent-style-name'])
|
96
|
+
@para_styles[style.name] = style
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def parse_paragraphs
|
101
|
+
results = []
|
102
|
+
paras = REXML::XPath.match(@doc, '//text:p')
|
103
|
+
paras.each do |p|
|
104
|
+
results << parse_paragraph(p)
|
105
|
+
end
|
106
|
+
results
|
107
|
+
end
|
108
|
+
|
109
|
+
def lookup_para_style(name)
|
110
|
+
s = @para_styles[name]
|
111
|
+
raise "No such para style #{name}" unless s
|
112
|
+
s
|
113
|
+
end
|
114
|
+
|
115
|
+
def lookup_text_style(name)
|
116
|
+
name = "Default" if name.nil? or name.empty?
|
117
|
+
s = @text_styles[name]
|
118
|
+
raise "No such text style [[#{name}]]" unless s
|
119
|
+
s
|
120
|
+
end
|
121
|
+
|
122
|
+
def parse_paragraph(p)
|
123
|
+
attrs = p.attributes
|
124
|
+
# puts "Parsing paragraph, attrs #{attrs}"
|
125
|
+
# puts "==> style-name: [[#{attrs['text:style-name']}]]"
|
126
|
+
style = lookup_para_style(attrs['text:style-name'])
|
127
|
+
|
128
|
+
para = Paragraph.new(style)
|
129
|
+
para.contents = parse_contents(para, p)
|
130
|
+
para
|
131
|
+
end
|
132
|
+
|
133
|
+
def parse_span(el)
|
134
|
+
attrs = el.attributes
|
135
|
+
style = lookup_text_style(attrs['text:style-name'])
|
136
|
+
indent = attrs['text:c'] ? attrs['text:c'].to_i : 0
|
137
|
+
span = Span.new(style)
|
138
|
+
span.indent = indent
|
139
|
+
span.contents = parse_contents(span, el)
|
140
|
+
span
|
141
|
+
end
|
142
|
+
|
143
|
+
def parse_contents(contents, el)
|
144
|
+
results = []
|
145
|
+
el.each_child do |kid|
|
146
|
+
if REXML::Text === kid
|
147
|
+
results << Text.new(REXML::Text.unnormalize(kid.value))
|
148
|
+
else
|
149
|
+
results << parse_span(kid)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
results
|
153
|
+
end
|
154
|
+
end
|
data/lib/yesroff/text.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
|
2
|
+
class Style
|
3
|
+
attr_accessor :name, :parent
|
4
|
+
|
5
|
+
|
6
|
+
def initialize(name, parent = nil)
|
7
|
+
@name = name
|
8
|
+
@parent = parent
|
9
|
+
end
|
10
|
+
|
11
|
+
def child_of?(name)
|
12
|
+
return true if name == @name
|
13
|
+
return false unless @parent
|
14
|
+
@parent.child_of?(name)
|
15
|
+
end
|
16
|
+
|
17
|
+
def base_style
|
18
|
+
return self unless @parent
|
19
|
+
@parent.base_style
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class TextStyle < Style
|
24
|
+
attr_accessor :bold, :italic, :code
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
result = "[Style: #{name}"
|
28
|
+
result += 'B' if @bold
|
29
|
+
result += 'I' if @italic
|
30
|
+
result += 'C' if @code
|
31
|
+
result += ']'
|
32
|
+
end
|
33
|
+
|
34
|
+
def render(w)
|
35
|
+
w.toggle_bold if @bold
|
36
|
+
w.toggle_code if @code
|
37
|
+
w.toggle_italic if @italic
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
class ParagraphStyle < Style
|
43
|
+
attr_accessor :nr_name, :single_line
|
44
|
+
|
45
|
+
def initialize(name, parent=nil, nr_name='', single_line=true)
|
46
|
+
super(name, parent)
|
47
|
+
@nr_name = nr_name
|
48
|
+
@single_line = single_line
|
49
|
+
end
|
50
|
+
|
51
|
+
def render(w)
|
52
|
+
base = self.base_style
|
53
|
+
w.switch_para_style(base.nr_name, base.single_line)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class Container
|
58
|
+
attr_accessor :style, :parent, :contents
|
59
|
+
|
60
|
+
def initialize(style, parent=nil, contents=[])
|
61
|
+
@style = style
|
62
|
+
@contents = contents
|
63
|
+
end
|
64
|
+
|
65
|
+
def <<(content)
|
66
|
+
@contents << content
|
67
|
+
end
|
68
|
+
|
69
|
+
def render(w)
|
70
|
+
#puts "========= rendering #{self.class} size: #{contents.size}"
|
71
|
+
#pp contents
|
72
|
+
#puts "Rendering:"
|
73
|
+
contents.each {|c| c.render(w)}
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
class Span < Container
|
78
|
+
attr_accessor :indent
|
79
|
+
|
80
|
+
def render(w)
|
81
|
+
style.render(w)
|
82
|
+
w.indent(@indent) if @indent
|
83
|
+
super(w)
|
84
|
+
style.render(w)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class Paragraph < Container
|
89
|
+
def render(w)
|
90
|
+
style.render(w)
|
91
|
+
super(w)
|
92
|
+
w.end_paragraph
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class Text
|
97
|
+
attr_accessor :text
|
98
|
+
|
99
|
+
def initialize(t)
|
100
|
+
@text = t
|
101
|
+
end
|
102
|
+
|
103
|
+
def render(w)
|
104
|
+
w.text(@text)
|
105
|
+
end
|
106
|
+
|
107
|
+
def to_s
|
108
|
+
"{{Text: #{text.class} : #{text}}}"
|
109
|
+
end
|
110
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: notroff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,13 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-09-03 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
14
|
description: NotRoff A simple text to openoffice filter
|
15
15
|
email: russ@russolsen.com
|
16
16
|
executables:
|
17
17
|
- notroff
|
18
|
+
- yesroff
|
18
19
|
extensions: []
|
19
20
|
extra_rdoc_files: []
|
20
21
|
files:
|
@@ -58,7 +59,12 @@ files:
|
|
58
59
|
- lib/notroff/type_assigner.rb
|
59
60
|
- lib/notroff/type_refiner.rb
|
60
61
|
- lib/notroff.rb
|
62
|
+
- lib/yesroff/nr_writer.rb
|
63
|
+
- lib/yesroff/odt_parser.rb
|
64
|
+
- lib/yesroff/text.rb
|
65
|
+
- lib/yesroff.rb
|
61
66
|
- bin/notroff
|
67
|
+
- bin/yesroff
|
62
68
|
homepage: http://www.russolsen.com
|
63
69
|
licenses: []
|
64
70
|
post_install_message:
|