htot_conv 0.3.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +135 -135
- data/.travis.yml +12 -12
- data/Gemfile +4 -4
- data/LICENSE.txt +21 -21
- data/README.md +46 -138
- data/Rakefile +10 -10
- data/bin/console +14 -14
- data/bin/setup +8 -8
- data/docs/image/output_xlsx_type0.png +0 -0
- data/docs/image/output_xlsx_type1.png +0 -0
- data/docs/image/output_xlsx_type1_outline_rows_yes.png +0 -0
- data/docs/image/output_xlsx_type2.png +0 -0
- data/docs/image/output_xlsx_type2_integrate_cells_colspan.png +0 -0
- data/docs/image/output_xlsx_type2_outline_rows_yes.png +0 -0
- data/docs/image/output_xlsx_type3.png +0 -0
- data/docs/image/output_xlsx_type3_integrate_cells_both.png +0 -0
- data/docs/image/output_xlsx_type4.png +0 -0
- data/docs/image/output_xlsx_type4_integrate_cells_both.png +0 -0
- data/docs/image/output_xlsx_type5.png +0 -0
- data/docs/image/output_xlsx_type5_integrate_cells_colspan.png +0 -0
- data/docs/index.md +88 -0
- data/exe/htot_conv +8 -8
- data/htot_conv.gemspec +37 -37
- data/lib/htot_conv.rb +20 -20
- data/lib/htot_conv/cli.rb +174 -174
- data/lib/htot_conv/generator.rb +30 -30
- data/lib/htot_conv/generator/base.rb +34 -35
- data/lib/htot_conv/generator/xlsx_type0.rb +36 -24
- data/lib/htot_conv/generator/xlsx_type1.rb +57 -69
- data/lib/htot_conv/generator/xlsx_type2.rb +100 -104
- data/lib/htot_conv/generator/xlsx_type3.rb +99 -85
- data/lib/htot_conv/generator/xlsx_type4.rb +109 -84
- data/lib/htot_conv/generator/xlsx_type5.rb +75 -62
- data/lib/htot_conv/outline.rb +176 -176
- data/lib/htot_conv/parser.rb +27 -27
- data/lib/htot_conv/parser/base.rb +15 -15
- data/lib/htot_conv/parser/dir_tree.rb +54 -54
- data/lib/htot_conv/parser/html_list.rb +71 -71
- data/lib/htot_conv/parser/opml.rb +70 -70
- data/lib/htot_conv/parser/simple_text.rb +70 -70
- data/lib/htot_conv/util.rb +13 -13
- data/lib/htot_conv/version.rb +4 -4
- metadata +19 -6
@@ -1,71 +1,71 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
require 'htot_conv/parser/base'
|
3
|
-
|
4
|
-
module HTOTConv
|
5
|
-
module Parser
|
6
|
-
class HtmlList < Base
|
7
|
-
def self.option_help
|
8
|
-
{
|
9
|
-
:key_header => {
|
10
|
-
:default => [],
|
11
|
-
:pat => Array,
|
12
|
-
:desc => "key header",
|
13
|
-
},
|
14
|
-
}
|
15
|
-
end
|
16
|
-
|
17
|
-
def parse(input)
|
18
|
-
outline = HTOTConv::Outline.new
|
19
|
-
outline.key_header = @option[:key_header]
|
20
|
-
outline.value_header = []
|
21
|
-
|
22
|
-
parser = Nokogiri::HTML::SAX::Parser.new(ListDoc.new(outline))
|
23
|
-
parser.parse(input)
|
24
|
-
|
25
|
-
outline
|
26
|
-
end
|
27
|
-
|
28
|
-
class ListDoc < Nokogiri::XML::SAX::Document
|
29
|
-
def initialize(outline)
|
30
|
-
@outline = outline
|
31
|
-
@breadcrumb = []
|
32
|
-
@li_text = nil
|
33
|
-
end
|
34
|
-
|
35
|
-
def start_element(name, attrs=[])
|
36
|
-
if ((name == "ul") || (name == "ol"))
|
37
|
-
generate_outline_item unless @li_text.nil?
|
38
|
-
@breadcrumb << name
|
39
|
-
elsif name == "li"
|
40
|
-
@li_text = "".dup if @breadcrumb.length > 0
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def end_element(name)
|
45
|
-
if ((name == "ul") || (name == "ol"))
|
46
|
-
generate_outline_item unless @li_text.nil?
|
47
|
-
@breadcrumb.pop
|
48
|
-
elsif name == "li"
|
49
|
-
generate_outline_item unless @li_text.nil?
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
def characters(string)
|
54
|
-
@li_text << string unless @li_text.nil?
|
55
|
-
end
|
56
|
-
|
57
|
-
def cdata_block(string)
|
58
|
-
@li_text << string unless @li_text.nil?
|
59
|
-
end
|
60
|
-
|
61
|
-
private
|
62
|
-
def generate_outline_item
|
63
|
-
level = @breadcrumb.length
|
64
|
-
@outline.add_item(@li_text.strip, level, [])
|
65
|
-
@li_text = nil
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'htot_conv/parser/base'
|
3
|
+
|
4
|
+
module HTOTConv
|
5
|
+
module Parser
|
6
|
+
class HtmlList < Base
|
7
|
+
def self.option_help
|
8
|
+
{
|
9
|
+
:key_header => {
|
10
|
+
:default => [],
|
11
|
+
:pat => Array,
|
12
|
+
:desc => "key header",
|
13
|
+
},
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse(input)
|
18
|
+
outline = HTOTConv::Outline.new
|
19
|
+
outline.key_header = @option[:key_header]
|
20
|
+
outline.value_header = []
|
21
|
+
|
22
|
+
parser = Nokogiri::HTML::SAX::Parser.new(ListDoc.new(outline))
|
23
|
+
parser.parse(input)
|
24
|
+
|
25
|
+
outline
|
26
|
+
end
|
27
|
+
|
28
|
+
class ListDoc < Nokogiri::XML::SAX::Document
|
29
|
+
def initialize(outline)
|
30
|
+
@outline = outline
|
31
|
+
@breadcrumb = []
|
32
|
+
@li_text = nil
|
33
|
+
end
|
34
|
+
|
35
|
+
def start_element(name, attrs=[])
|
36
|
+
if ((name == "ul") || (name == "ol"))
|
37
|
+
generate_outline_item unless @li_text.nil?
|
38
|
+
@breadcrumb << name
|
39
|
+
elsif name == "li"
|
40
|
+
@li_text = "".dup if @breadcrumb.length > 0
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def end_element(name)
|
45
|
+
if ((name == "ul") || (name == "ol"))
|
46
|
+
generate_outline_item unless @li_text.nil?
|
47
|
+
@breadcrumb.pop
|
48
|
+
elsif name == "li"
|
49
|
+
generate_outline_item unless @li_text.nil?
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def characters(string)
|
54
|
+
@li_text << string unless @li_text.nil?
|
55
|
+
end
|
56
|
+
|
57
|
+
def cdata_block(string)
|
58
|
+
@li_text << string unless @li_text.nil?
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
def generate_outline_item
|
63
|
+
level = @breadcrumb.length
|
64
|
+
@outline.add_item(@li_text.strip, level, [])
|
65
|
+
@li_text = nil
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -1,70 +1,70 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
require 'htot_conv/parser/base'
|
3
|
-
|
4
|
-
module HTOTConv
|
5
|
-
module Parser
|
6
|
-
class Opml < Base
|
7
|
-
def self.option_help
|
8
|
-
{
|
9
|
-
:key_header => {
|
10
|
-
:default => [],
|
11
|
-
:pat => Array,
|
12
|
-
:desc => "key header",
|
13
|
-
},
|
14
|
-
}
|
15
|
-
end
|
16
|
-
|
17
|
-
def parse(input)
|
18
|
-
outline = HTOTConv::Outline.new
|
19
|
-
outline.key_header = @option[:key_header]
|
20
|
-
outline.value_header = []
|
21
|
-
|
22
|
-
parser = Nokogiri::XML::SAX::Parser.new(ListDoc.new(outline))
|
23
|
-
parser.parse(input)
|
24
|
-
|
25
|
-
outline
|
26
|
-
end
|
27
|
-
|
28
|
-
class ListDoc < Nokogiri::XML::SAX::Document
|
29
|
-
def initialize(outline)
|
30
|
-
@outline = outline
|
31
|
-
@breadcrumb = []
|
32
|
-
end
|
33
|
-
|
34
|
-
def start_element(name, attrs=[])
|
35
|
-
if (name == "outline")
|
36
|
-
@breadcrumb << name
|
37
|
-
generate_outline_item(attrs)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
def end_element(name)
|
42
|
-
@breadcrumb.pop if (name == "outline")
|
43
|
-
end
|
44
|
-
|
45
|
-
private
|
46
|
-
def generate_outline_item(attrs)
|
47
|
-
text = ""
|
48
|
-
level = @breadcrumb.length
|
49
|
-
values = []
|
50
|
-
attrs.each do |pair|
|
51
|
-
attr_name, attr_val = pair
|
52
|
-
if attr_name == "text"
|
53
|
-
text = attr_val
|
54
|
-
else
|
55
|
-
unless @outline.value_header.include?(attr_name)
|
56
|
-
@outline.value_header << attr_name
|
57
|
-
values[@outline.value_header.length - 1] = attr_val
|
58
|
-
else
|
59
|
-
values[@outline.value_header.index(attr_name)] = attr_val
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
@outline.add_item(text, level, values)
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'htot_conv/parser/base'
|
3
|
+
|
4
|
+
module HTOTConv
|
5
|
+
module Parser
|
6
|
+
class Opml < Base
|
7
|
+
def self.option_help
|
8
|
+
{
|
9
|
+
:key_header => {
|
10
|
+
:default => [],
|
11
|
+
:pat => Array,
|
12
|
+
:desc => "key header",
|
13
|
+
},
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse(input)
|
18
|
+
outline = HTOTConv::Outline.new
|
19
|
+
outline.key_header = @option[:key_header]
|
20
|
+
outline.value_header = []
|
21
|
+
|
22
|
+
parser = Nokogiri::XML::SAX::Parser.new(ListDoc.new(outline))
|
23
|
+
parser.parse(input)
|
24
|
+
|
25
|
+
outline
|
26
|
+
end
|
27
|
+
|
28
|
+
class ListDoc < Nokogiri::XML::SAX::Document
|
29
|
+
def initialize(outline)
|
30
|
+
@outline = outline
|
31
|
+
@breadcrumb = []
|
32
|
+
end
|
33
|
+
|
34
|
+
def start_element(name, attrs=[])
|
35
|
+
if (name == "outline")
|
36
|
+
@breadcrumb << name
|
37
|
+
generate_outline_item(attrs)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def end_element(name)
|
42
|
+
@breadcrumb.pop if (name == "outline")
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
def generate_outline_item(attrs)
|
47
|
+
text = ""
|
48
|
+
level = @breadcrumb.length
|
49
|
+
values = []
|
50
|
+
attrs.each do |pair|
|
51
|
+
attr_name, attr_val = pair
|
52
|
+
if attr_name == "text"
|
53
|
+
text = attr_val
|
54
|
+
else
|
55
|
+
unless @outline.value_header.include?(attr_name)
|
56
|
+
@outline.value_header << attr_name
|
57
|
+
values[@outline.value_header.length - 1] = attr_val
|
58
|
+
else
|
59
|
+
values[@outline.value_header.index(attr_name)] = attr_val
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
@outline.add_item(text, level, values)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -1,70 +1,70 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
require 'htot_conv/parser/base'
|
3
|
-
|
4
|
-
module HTOTConv
|
5
|
-
module Parser
|
6
|
-
class SimpleText < Base
|
7
|
-
def self.option_help
|
8
|
-
{
|
9
|
-
:indent => {
|
10
|
-
:default => "\t",
|
11
|
-
:pat => String,
|
12
|
-
:desc => "indent character (default: TAB)",
|
13
|
-
},
|
14
|
-
:delimiter => {
|
15
|
-
:default => nil,
|
16
|
-
:pat => String,
|
17
|
-
:desc => "separator character of additional data",
|
18
|
-
},
|
19
|
-
:preserve_empty_line => {
|
20
|
-
:default => false,
|
21
|
-
:pat => FalseClass,
|
22
|
-
:desc => "preserve empty line as a level-1 item (default: no)",
|
23
|
-
},
|
24
|
-
:key_header => {
|
25
|
-
:default => [],
|
26
|
-
:pat => Array,
|
27
|
-
:desc => "key header",
|
28
|
-
},
|
29
|
-
:value_header => {
|
30
|
-
:default => [],
|
31
|
-
:pat => Array,
|
32
|
-
:desc => "value header",
|
33
|
-
},
|
34
|
-
}
|
35
|
-
end
|
36
|
-
|
37
|
-
def parse(input)
|
38
|
-
indent_regexp = Regexp.new("^(?<indents>(#{Regexp.escape(@option[:indent])})*)")
|
39
|
-
delimiter_regexp = (@option[:delimiter].kind_of?(String))? Regexp.new(Regexp.escape(@option[:delimiter])) : @option[:delimiter]
|
40
|
-
outline = HTOTConv::Outline.new
|
41
|
-
outline.key_header = @option[:key_header]
|
42
|
-
outline.value_header = @option[:value_header]
|
43
|
-
|
44
|
-
input.each_line do |line|
|
45
|
-
next if ((line.chomp == "") && (!@option[:preserve_empty_line]))
|
46
|
-
|
47
|
-
level = 1
|
48
|
-
value = []
|
49
|
-
if (@option[:indent] || '').length > 0
|
50
|
-
indents = indent_regexp.match(line)[:indents]
|
51
|
-
level = 1 + indents.length / @option[:indent].length
|
52
|
-
line = line.sub(indent_regexp, "")
|
53
|
-
end
|
54
|
-
|
55
|
-
line = line.strip
|
56
|
-
if delimiter_regexp
|
57
|
-
key = line.split(delimiter_regexp)[0]
|
58
|
-
value = line.split(delimiter_regexp)[1..-1] || []
|
59
|
-
else
|
60
|
-
key = line
|
61
|
-
end
|
62
|
-
|
63
|
-
outline.add_item(key, level, value)
|
64
|
-
end
|
65
|
-
|
66
|
-
outline
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'htot_conv/parser/base'
|
3
|
+
|
4
|
+
module HTOTConv
|
5
|
+
module Parser
|
6
|
+
class SimpleText < Base
|
7
|
+
def self.option_help
|
8
|
+
{
|
9
|
+
:indent => {
|
10
|
+
:default => "\t",
|
11
|
+
:pat => String,
|
12
|
+
:desc => "indent character (default: TAB)",
|
13
|
+
},
|
14
|
+
:delimiter => {
|
15
|
+
:default => nil,
|
16
|
+
:pat => String,
|
17
|
+
:desc => "separator character of additional data",
|
18
|
+
},
|
19
|
+
:preserve_empty_line => {
|
20
|
+
:default => false,
|
21
|
+
:pat => FalseClass,
|
22
|
+
:desc => "preserve empty line as a level-1 item (default: no)",
|
23
|
+
},
|
24
|
+
:key_header => {
|
25
|
+
:default => [],
|
26
|
+
:pat => Array,
|
27
|
+
:desc => "key header",
|
28
|
+
},
|
29
|
+
:value_header => {
|
30
|
+
:default => [],
|
31
|
+
:pat => Array,
|
32
|
+
:desc => "value header",
|
33
|
+
},
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def parse(input)
|
38
|
+
indent_regexp = Regexp.new("^(?<indents>(#{Regexp.escape(@option[:indent])})*)")
|
39
|
+
delimiter_regexp = (@option[:delimiter].kind_of?(String))? Regexp.new(Regexp.escape(@option[:delimiter])) : @option[:delimiter]
|
40
|
+
outline = HTOTConv::Outline.new
|
41
|
+
outline.key_header = @option[:key_header]
|
42
|
+
outline.value_header = @option[:value_header]
|
43
|
+
|
44
|
+
input.each_line do |line|
|
45
|
+
next if ((line.chomp == "") && (!@option[:preserve_empty_line]))
|
46
|
+
|
47
|
+
level = 1
|
48
|
+
value = []
|
49
|
+
if (@option[:indent] || '').length > 0
|
50
|
+
indents = indent_regexp.match(line)[:indents]
|
51
|
+
level = 1 + indents.length / @option[:indent].length
|
52
|
+
line = line.sub(indent_regexp, "")
|
53
|
+
end
|
54
|
+
|
55
|
+
line = line.strip
|
56
|
+
if delimiter_regexp
|
57
|
+
key = line.split(delimiter_regexp)[0]
|
58
|
+
value = line.split(delimiter_regexp)[1..-1] || []
|
59
|
+
else
|
60
|
+
key = line
|
61
|
+
end
|
62
|
+
|
63
|
+
outline.add_item(key, level, value)
|
64
|
+
end
|
65
|
+
|
66
|
+
outline
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/lib/htot_conv/util.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTOTConv
|
4
|
-
module Util
|
5
|
-
def pad_array(array, length, pad=nil)
|
6
|
-
raise ArgumentError, "array is not an array" unless array.kind_of?(Array)
|
7
|
-
raise ArgumentError, "array length #{array.length} is larger than #{length}" if array.length > length
|
8
|
-
|
9
|
-
array.concat(Array.new(length - array.length, pad))
|
10
|
-
end
|
11
|
-
module_function :pad_array
|
12
|
-
end
|
13
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTOTConv
|
4
|
+
module Util
|
5
|
+
def pad_array(array, length, pad=nil)
|
6
|
+
raise ArgumentError, "array is not an array" unless array.kind_of?(Array)
|
7
|
+
raise ArgumentError, "array length #{array.length} is larger than #{length}" if array.length > length
|
8
|
+
|
9
|
+
array.concat(Array.new(length - array.length, pad))
|
10
|
+
end
|
11
|
+
module_function :pad_array
|
12
|
+
end
|
13
|
+
end
|