notroff 0.2.13 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/yesroff +30 -0
- data/lib/notroff/type_assigner.rb +1 -1
- data/lib/yesroff.rb +3 -0
- data/lib/yesroff/nr_writer.rb +51 -0
- data/lib/yesroff/odt_parser.rb +154 -0
- data/lib/yesroff/text.rb +110 -0
- metadata +8 -2
data/bin/yesroff
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'pp'
|
3
|
+
require "yesroff"
|
4
|
+
|
5
|
+
unless ARGV.size == 1
|
6
|
+
raise "Usage: yesroff <<odt_file>>"
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
parser = OdtParser.new(ARGV[0])
|
11
|
+
parser.parse
|
12
|
+
parser.render
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
|
@@ -21,7 +21,7 @@ class TypeAssigner
|
|
21
21
|
processed_paragraphs << paragraph
|
22
22
|
end
|
23
23
|
|
24
|
-
current_type = :body if [ :section, :title, :code1 ].include?(type)
|
24
|
+
current_type = :body if [ :section, :sec, :c1, :subsec, :title, :code1 ].include?(type)
|
25
25
|
end
|
26
26
|
processed_paragraphs
|
27
27
|
end
|
data/lib/yesroff.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
class NRWriter
|
2
|
+
attr_reader :para_style
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@para_style = :body
|
6
|
+
@single_line = false
|
7
|
+
end
|
8
|
+
|
9
|
+
def switch_para_style(new_style, single_line)
|
10
|
+
#end_paragraph
|
11
|
+
return if new_style == @para_style
|
12
|
+
print ".#{new_style}"
|
13
|
+
if single_line
|
14
|
+
print ' '
|
15
|
+
else
|
16
|
+
print "\n"
|
17
|
+
end
|
18
|
+
@para_style = new_style
|
19
|
+
@single_line = single_line
|
20
|
+
end
|
21
|
+
|
22
|
+
def end_paragraph
|
23
|
+
puts
|
24
|
+
if @single_line
|
25
|
+
@para_style = :body
|
26
|
+
@single_line = false
|
27
|
+
elsif @para_style != :code && @para_style != :listing
|
28
|
+
puts
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def indent(n)
|
33
|
+
print (' ' * n)
|
34
|
+
|
35
|
+
end
|
36
|
+
def toggle_bold
|
37
|
+
print "!!"
|
38
|
+
end
|
39
|
+
|
40
|
+
def toggle_italic
|
41
|
+
print "~~"
|
42
|
+
end
|
43
|
+
|
44
|
+
def toggle_code
|
45
|
+
print '@@'
|
46
|
+
end
|
47
|
+
|
48
|
+
def text(t)
|
49
|
+
print t
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
require 'pp'
|
3
|
+
require 'zip/zipfilesystem'
|
4
|
+
|
5
|
+
class OdtParser
|
6
|
+
def initialize(odt_path)
|
7
|
+
Zip::ZipFile.open(odt_path ) do |zipfile|
|
8
|
+
zipfile.file.open("content.xml") do |content|
|
9
|
+
@doc = REXML::Document.new(content.read)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
@writer = NRWriter.new
|
14
|
+
@paras = []
|
15
|
+
@text_styles = default_text_styles
|
16
|
+
@para_styles = default_para_styles
|
17
|
+
end
|
18
|
+
|
19
|
+
def default_text_styles
|
20
|
+
cd1 = TextStyle.new("CD1")
|
21
|
+
cd1.code = true
|
22
|
+
|
23
|
+
styles = [
|
24
|
+
cd1,
|
25
|
+
TextStyle.new("Default"),
|
26
|
+
TextStyle.new("C1"),
|
27
|
+
TextStyle.new("C1_20_HD"),
|
28
|
+
TextStyle.new("FN"),
|
29
|
+
TextStyle.new("Base_20_Font"),
|
30
|
+
TextStyle.new("Chapter_20_Word")
|
31
|
+
]
|
32
|
+
hash = {}
|
33
|
+
styles.each {|s| hash[s.name] = s}
|
34
|
+
hash
|
35
|
+
end
|
36
|
+
|
37
|
+
def default_para_styles
|
38
|
+
styles = [
|
39
|
+
ParagraphStyle.new('FT', nil, :body, false),
|
40
|
+
ParagraphStyle.new('IT', nil, :body, false),
|
41
|
+
ParagraphStyle.new('Quotation', nil, :quote, true),
|
42
|
+
ParagraphStyle.new('CDT1', nil, :code, false),
|
43
|
+
ParagraphStyle.new('CDT', nil, :code, false),
|
44
|
+
ParagraphStyle.new('HA', nil, :title, true),
|
45
|
+
ParagraphStyle.new('HB', nil, :subtitle, true),
|
46
|
+
ParagraphStyle.new('HC', nil, :sec, true),
|
47
|
+
ParagraphStyle.new('HD', nil, :subsec, true),
|
48
|
+
ParagraphStyle.new('LH', nil, :ltitle, true),
|
49
|
+
ParagraphStyle.new('LC', nil, :listing, false),
|
50
|
+
ParagraphStyle.new('LC2', nil, :listing, false),
|
51
|
+
ParagraphStyle.new('LX', nil, :listing, false),
|
52
|
+
ParagraphStyle.new('C1', nil, :c1, true),
|
53
|
+
ParagraphStyle.new('BL1', nil, :bullet, true),
|
54
|
+
ParagraphStyle.new('BL', nil, :bullet, true),
|
55
|
+
ParagraphStyle.new('BX', nil, :bullet, true),
|
56
|
+
ParagraphStyle.new('Quotation_20_Attribution', nil, :attribution, true),
|
57
|
+
ParagraphStyle.new('CDTX', nil, :code, false)
|
58
|
+
]
|
59
|
+
|
60
|
+
hash = {}
|
61
|
+
styles.each {|s| hash[s.name] = s}
|
62
|
+
hash
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse
|
66
|
+
parse_text_styles
|
67
|
+
parse_paragraph_styles
|
68
|
+
@paras = parse_paragraphs
|
69
|
+
end
|
70
|
+
|
71
|
+
def render
|
72
|
+
@paras.each {|p| p.render(@writer)}
|
73
|
+
end
|
74
|
+
|
75
|
+
def parse_text_styles
|
76
|
+
styles = REXML::XPath.match(@doc, "//style:style[@style:family='text']")
|
77
|
+
styles.each do |s|
|
78
|
+
attrs = s.attributes
|
79
|
+
style = TextStyle.new(attrs['style:name'])
|
80
|
+
props = REXML::XPath.first(s, "./style:text-properties")
|
81
|
+
if props
|
82
|
+
style.bold = (props.attributes['fo:font-weight'] == 'bold')
|
83
|
+
style.italic = (/italic/i =~ props.attributes['style:font-name']) ||
|
84
|
+
(props.attributes['fo:font-style'] == 'italic')
|
85
|
+
end
|
86
|
+
@text_styles[style.name] = style
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def parse_paragraph_styles
|
91
|
+
styles = REXML::XPath.match(@doc, "//style:style[@style:family='paragraph']")
|
92
|
+
styles.each do |s|
|
93
|
+
attrs = s.attributes
|
94
|
+
style = ParagraphStyle.new(attrs['style:name'])
|
95
|
+
style.parent = lookup_para_style(attrs['parent-style-name'])
|
96
|
+
@para_styles[style.name] = style
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def parse_paragraphs
|
101
|
+
results = []
|
102
|
+
paras = REXML::XPath.match(@doc, '//text:p')
|
103
|
+
paras.each do |p|
|
104
|
+
results << parse_paragraph(p)
|
105
|
+
end
|
106
|
+
results
|
107
|
+
end
|
108
|
+
|
109
|
+
def lookup_para_style(name)
|
110
|
+
s = @para_styles[name]
|
111
|
+
raise "No such para style #{name}" unless s
|
112
|
+
s
|
113
|
+
end
|
114
|
+
|
115
|
+
def lookup_text_style(name)
|
116
|
+
name = "Default" if name.nil? or name.empty?
|
117
|
+
s = @text_styles[name]
|
118
|
+
raise "No such text style [[#{name}]]" unless s
|
119
|
+
s
|
120
|
+
end
|
121
|
+
|
122
|
+
def parse_paragraph(p)
|
123
|
+
attrs = p.attributes
|
124
|
+
# puts "Parsing paragraph, attrs #{attrs}"
|
125
|
+
# puts "==> style-name: [[#{attrs['text:style-name']}]]"
|
126
|
+
style = lookup_para_style(attrs['text:style-name'])
|
127
|
+
|
128
|
+
para = Paragraph.new(style)
|
129
|
+
para.contents = parse_contents(para, p)
|
130
|
+
para
|
131
|
+
end
|
132
|
+
|
133
|
+
def parse_span(el)
|
134
|
+
attrs = el.attributes
|
135
|
+
style = lookup_text_style(attrs['text:style-name'])
|
136
|
+
indent = attrs['text:c'] ? attrs['text:c'].to_i : 0
|
137
|
+
span = Span.new(style)
|
138
|
+
span.indent = indent
|
139
|
+
span.contents = parse_contents(span, el)
|
140
|
+
span
|
141
|
+
end
|
142
|
+
|
143
|
+
def parse_contents(contents, el)
|
144
|
+
results = []
|
145
|
+
el.each_child do |kid|
|
146
|
+
if REXML::Text === kid
|
147
|
+
results << Text.new(REXML::Text.unnormalize(kid.value))
|
148
|
+
else
|
149
|
+
results << parse_span(kid)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
results
|
153
|
+
end
|
154
|
+
end
|
data/lib/yesroff/text.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
|
2
|
+
class Style
|
3
|
+
attr_accessor :name, :parent
|
4
|
+
|
5
|
+
|
6
|
+
def initialize(name, parent = nil)
|
7
|
+
@name = name
|
8
|
+
@parent = parent
|
9
|
+
end
|
10
|
+
|
11
|
+
def child_of?(name)
|
12
|
+
return true if name == @name
|
13
|
+
return false unless @parent
|
14
|
+
@parent.child_of?(name)
|
15
|
+
end
|
16
|
+
|
17
|
+
def base_style
|
18
|
+
return self unless @parent
|
19
|
+
@parent.base_style
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class TextStyle < Style
|
24
|
+
attr_accessor :bold, :italic, :code
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
result = "[Style: #{name}"
|
28
|
+
result += 'B' if @bold
|
29
|
+
result += 'I' if @italic
|
30
|
+
result += 'C' if @code
|
31
|
+
result += ']'
|
32
|
+
end
|
33
|
+
|
34
|
+
def render(w)
|
35
|
+
w.toggle_bold if @bold
|
36
|
+
w.toggle_code if @code
|
37
|
+
w.toggle_italic if @italic
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
class ParagraphStyle < Style
|
43
|
+
attr_accessor :nr_name, :single_line
|
44
|
+
|
45
|
+
def initialize(name, parent=nil, nr_name='', single_line=true)
|
46
|
+
super(name, parent)
|
47
|
+
@nr_name = nr_name
|
48
|
+
@single_line = single_line
|
49
|
+
end
|
50
|
+
|
51
|
+
def render(w)
|
52
|
+
base = self.base_style
|
53
|
+
w.switch_para_style(base.nr_name, base.single_line)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class Container
|
58
|
+
attr_accessor :style, :parent, :contents
|
59
|
+
|
60
|
+
def initialize(style, parent=nil, contents=[])
|
61
|
+
@style = style
|
62
|
+
@contents = contents
|
63
|
+
end
|
64
|
+
|
65
|
+
def <<(content)
|
66
|
+
@contents << content
|
67
|
+
end
|
68
|
+
|
69
|
+
def render(w)
|
70
|
+
#puts "========= rendering #{self.class} size: #{contents.size}"
|
71
|
+
#pp contents
|
72
|
+
#puts "Rendering:"
|
73
|
+
contents.each {|c| c.render(w)}
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
class Span < Container
|
78
|
+
attr_accessor :indent
|
79
|
+
|
80
|
+
def render(w)
|
81
|
+
style.render(w)
|
82
|
+
w.indent(@indent) if @indent
|
83
|
+
super(w)
|
84
|
+
style.render(w)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class Paragraph < Container
|
89
|
+
def render(w)
|
90
|
+
style.render(w)
|
91
|
+
super(w)
|
92
|
+
w.end_paragraph
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class Text
|
97
|
+
attr_accessor :text
|
98
|
+
|
99
|
+
def initialize(t)
|
100
|
+
@text = t
|
101
|
+
end
|
102
|
+
|
103
|
+
def render(w)
|
104
|
+
w.text(@text)
|
105
|
+
end
|
106
|
+
|
107
|
+
def to_s
|
108
|
+
"{{Text: #{text.class} : #{text}}}"
|
109
|
+
end
|
110
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: notroff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,13 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-09-03 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
14
|
description: NotRoff A simple text to openoffice filter
|
15
15
|
email: russ@russolsen.com
|
16
16
|
executables:
|
17
17
|
- notroff
|
18
|
+
- yesroff
|
18
19
|
extensions: []
|
19
20
|
extra_rdoc_files: []
|
20
21
|
files:
|
@@ -58,7 +59,12 @@ files:
|
|
58
59
|
- lib/notroff/type_assigner.rb
|
59
60
|
- lib/notroff/type_refiner.rb
|
60
61
|
- lib/notroff.rb
|
62
|
+
- lib/yesroff/nr_writer.rb
|
63
|
+
- lib/yesroff/odt_parser.rb
|
64
|
+
- lib/yesroff/text.rb
|
65
|
+
- lib/yesroff.rb
|
61
66
|
- bin/notroff
|
67
|
+
- bin/yesroff
|
62
68
|
homepage: http://www.russolsen.com
|
63
69
|
licenses: []
|
64
70
|
post_install_message:
|