htot_conv 0.3.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +135 -135
- data/.travis.yml +12 -12
- data/Gemfile +4 -4
- data/LICENSE.txt +21 -21
- data/README.md +46 -138
- data/Rakefile +10 -10
- data/bin/console +14 -14
- data/bin/setup +8 -8
- data/docs/image/output_xlsx_type0.png +0 -0
- data/docs/image/output_xlsx_type1.png +0 -0
- data/docs/image/output_xlsx_type1_outline_rows_yes.png +0 -0
- data/docs/image/output_xlsx_type2.png +0 -0
- data/docs/image/output_xlsx_type2_integrate_cells_colspan.png +0 -0
- data/docs/image/output_xlsx_type2_outline_rows_yes.png +0 -0
- data/docs/image/output_xlsx_type3.png +0 -0
- data/docs/image/output_xlsx_type3_integrate_cells_both.png +0 -0
- data/docs/image/output_xlsx_type4.png +0 -0
- data/docs/image/output_xlsx_type4_integrate_cells_both.png +0 -0
- data/docs/image/output_xlsx_type5.png +0 -0
- data/docs/image/output_xlsx_type5_integrate_cells_colspan.png +0 -0
- data/docs/index.md +88 -0
- data/exe/htot_conv +8 -8
- data/htot_conv.gemspec +37 -37
- data/lib/htot_conv.rb +20 -20
- data/lib/htot_conv/cli.rb +174 -174
- data/lib/htot_conv/generator.rb +30 -30
- data/lib/htot_conv/generator/base.rb +34 -35
- data/lib/htot_conv/generator/xlsx_type0.rb +36 -24
- data/lib/htot_conv/generator/xlsx_type1.rb +57 -69
- data/lib/htot_conv/generator/xlsx_type2.rb +100 -104
- data/lib/htot_conv/generator/xlsx_type3.rb +99 -85
- data/lib/htot_conv/generator/xlsx_type4.rb +109 -84
- data/lib/htot_conv/generator/xlsx_type5.rb +75 -62
- data/lib/htot_conv/outline.rb +176 -176
- data/lib/htot_conv/parser.rb +27 -27
- data/lib/htot_conv/parser/base.rb +15 -15
- data/lib/htot_conv/parser/dir_tree.rb +54 -54
- data/lib/htot_conv/parser/html_list.rb +71 -71
- data/lib/htot_conv/parser/opml.rb +70 -70
- data/lib/htot_conv/parser/simple_text.rb +70 -70
- data/lib/htot_conv/util.rb +13 -13
- data/lib/htot_conv/version.rb +4 -4
- metadata +19 -6
@@ -1,71 +1,71 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
require 'htot_conv/parser/base'
|
3
|
-
|
4
|
-
module HTOTConv
|
5
|
-
module Parser
|
6
|
-
class HtmlList < Base
|
7
|
-
def self.option_help
|
8
|
-
{
|
9
|
-
:key_header => {
|
10
|
-
:default => [],
|
11
|
-
:pat => Array,
|
12
|
-
:desc => "key header",
|
13
|
-
},
|
14
|
-
}
|
15
|
-
end
|
16
|
-
|
17
|
-
def parse(input)
|
18
|
-
outline = HTOTConv::Outline.new
|
19
|
-
outline.key_header = @option[:key_header]
|
20
|
-
outline.value_header = []
|
21
|
-
|
22
|
-
parser = Nokogiri::HTML::SAX::Parser.new(ListDoc.new(outline))
|
23
|
-
parser.parse(input)
|
24
|
-
|
25
|
-
outline
|
26
|
-
end
|
27
|
-
|
28
|
-
class ListDoc < Nokogiri::XML::SAX::Document
|
29
|
-
def initialize(outline)
|
30
|
-
@outline = outline
|
31
|
-
@breadcrumb = []
|
32
|
-
@li_text = nil
|
33
|
-
end
|
34
|
-
|
35
|
-
def start_element(name, attrs=[])
|
36
|
-
if ((name == "ul") || (name == "ol"))
|
37
|
-
generate_outline_item unless @li_text.nil?
|
38
|
-
@breadcrumb << name
|
39
|
-
elsif name == "li"
|
40
|
-
@li_text = "".dup if @breadcrumb.length > 0
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def end_element(name)
|
45
|
-
if ((name == "ul") || (name == "ol"))
|
46
|
-
generate_outline_item unless @li_text.nil?
|
47
|
-
@breadcrumb.pop
|
48
|
-
elsif name == "li"
|
49
|
-
generate_outline_item unless @li_text.nil?
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
def characters(string)
|
54
|
-
@li_text << string unless @li_text.nil?
|
55
|
-
end
|
56
|
-
|
57
|
-
def cdata_block(string)
|
58
|
-
@li_text << string unless @li_text.nil?
|
59
|
-
end
|
60
|
-
|
61
|
-
private
|
62
|
-
def generate_outline_item
|
63
|
-
level = @breadcrumb.length
|
64
|
-
@outline.add_item(@li_text.strip, level, [])
|
65
|
-
@li_text = nil
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'htot_conv/parser/base'
|
3
|
+
|
4
|
+
module HTOTConv
|
5
|
+
module Parser
|
6
|
+
class HtmlList < Base
|
7
|
+
def self.option_help
|
8
|
+
{
|
9
|
+
:key_header => {
|
10
|
+
:default => [],
|
11
|
+
:pat => Array,
|
12
|
+
:desc => "key header",
|
13
|
+
},
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse(input)
|
18
|
+
outline = HTOTConv::Outline.new
|
19
|
+
outline.key_header = @option[:key_header]
|
20
|
+
outline.value_header = []
|
21
|
+
|
22
|
+
parser = Nokogiri::HTML::SAX::Parser.new(ListDoc.new(outline))
|
23
|
+
parser.parse(input)
|
24
|
+
|
25
|
+
outline
|
26
|
+
end
|
27
|
+
|
28
|
+
class ListDoc < Nokogiri::XML::SAX::Document
|
29
|
+
def initialize(outline)
|
30
|
+
@outline = outline
|
31
|
+
@breadcrumb = []
|
32
|
+
@li_text = nil
|
33
|
+
end
|
34
|
+
|
35
|
+
def start_element(name, attrs=[])
|
36
|
+
if ((name == "ul") || (name == "ol"))
|
37
|
+
generate_outline_item unless @li_text.nil?
|
38
|
+
@breadcrumb << name
|
39
|
+
elsif name == "li"
|
40
|
+
@li_text = "".dup if @breadcrumb.length > 0
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def end_element(name)
|
45
|
+
if ((name == "ul") || (name == "ol"))
|
46
|
+
generate_outline_item unless @li_text.nil?
|
47
|
+
@breadcrumb.pop
|
48
|
+
elsif name == "li"
|
49
|
+
generate_outline_item unless @li_text.nil?
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def characters(string)
|
54
|
+
@li_text << string unless @li_text.nil?
|
55
|
+
end
|
56
|
+
|
57
|
+
def cdata_block(string)
|
58
|
+
@li_text << string unless @li_text.nil?
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
def generate_outline_item
|
63
|
+
level = @breadcrumb.length
|
64
|
+
@outline.add_item(@li_text.strip, level, [])
|
65
|
+
@li_text = nil
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -1,70 +1,70 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
require 'htot_conv/parser/base'
|
3
|
-
|
4
|
-
module HTOTConv
|
5
|
-
module Parser
|
6
|
-
class Opml < Base
|
7
|
-
def self.option_help
|
8
|
-
{
|
9
|
-
:key_header => {
|
10
|
-
:default => [],
|
11
|
-
:pat => Array,
|
12
|
-
:desc => "key header",
|
13
|
-
},
|
14
|
-
}
|
15
|
-
end
|
16
|
-
|
17
|
-
def parse(input)
|
18
|
-
outline = HTOTConv::Outline.new
|
19
|
-
outline.key_header = @option[:key_header]
|
20
|
-
outline.value_header = []
|
21
|
-
|
22
|
-
parser = Nokogiri::XML::SAX::Parser.new(ListDoc.new(outline))
|
23
|
-
parser.parse(input)
|
24
|
-
|
25
|
-
outline
|
26
|
-
end
|
27
|
-
|
28
|
-
class ListDoc < Nokogiri::XML::SAX::Document
|
29
|
-
def initialize(outline)
|
30
|
-
@outline = outline
|
31
|
-
@breadcrumb = []
|
32
|
-
end
|
33
|
-
|
34
|
-
def start_element(name, attrs=[])
|
35
|
-
if (name == "outline")
|
36
|
-
@breadcrumb << name
|
37
|
-
generate_outline_item(attrs)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
def end_element(name)
|
42
|
-
@breadcrumb.pop if (name == "outline")
|
43
|
-
end
|
44
|
-
|
45
|
-
private
|
46
|
-
def generate_outline_item(attrs)
|
47
|
-
text = ""
|
48
|
-
level = @breadcrumb.length
|
49
|
-
values = []
|
50
|
-
attrs.each do |pair|
|
51
|
-
attr_name, attr_val = pair
|
52
|
-
if attr_name == "text"
|
53
|
-
text = attr_val
|
54
|
-
else
|
55
|
-
unless @outline.value_header.include?(attr_name)
|
56
|
-
@outline.value_header << attr_name
|
57
|
-
values[@outline.value_header.length - 1] = attr_val
|
58
|
-
else
|
59
|
-
values[@outline.value_header.index(attr_name)] = attr_val
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
@outline.add_item(text, level, values)
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'htot_conv/parser/base'
|
3
|
+
|
4
|
+
module HTOTConv
|
5
|
+
module Parser
|
6
|
+
class Opml < Base
|
7
|
+
def self.option_help
|
8
|
+
{
|
9
|
+
:key_header => {
|
10
|
+
:default => [],
|
11
|
+
:pat => Array,
|
12
|
+
:desc => "key header",
|
13
|
+
},
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse(input)
|
18
|
+
outline = HTOTConv::Outline.new
|
19
|
+
outline.key_header = @option[:key_header]
|
20
|
+
outline.value_header = []
|
21
|
+
|
22
|
+
parser = Nokogiri::XML::SAX::Parser.new(ListDoc.new(outline))
|
23
|
+
parser.parse(input)
|
24
|
+
|
25
|
+
outline
|
26
|
+
end
|
27
|
+
|
28
|
+
class ListDoc < Nokogiri::XML::SAX::Document
|
29
|
+
def initialize(outline)
|
30
|
+
@outline = outline
|
31
|
+
@breadcrumb = []
|
32
|
+
end
|
33
|
+
|
34
|
+
def start_element(name, attrs=[])
|
35
|
+
if (name == "outline")
|
36
|
+
@breadcrumb << name
|
37
|
+
generate_outline_item(attrs)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def end_element(name)
|
42
|
+
@breadcrumb.pop if (name == "outline")
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
def generate_outline_item(attrs)
|
47
|
+
text = ""
|
48
|
+
level = @breadcrumb.length
|
49
|
+
values = []
|
50
|
+
attrs.each do |pair|
|
51
|
+
attr_name, attr_val = pair
|
52
|
+
if attr_name == "text"
|
53
|
+
text = attr_val
|
54
|
+
else
|
55
|
+
unless @outline.value_header.include?(attr_name)
|
56
|
+
@outline.value_header << attr_name
|
57
|
+
values[@outline.value_header.length - 1] = attr_val
|
58
|
+
else
|
59
|
+
values[@outline.value_header.index(attr_name)] = attr_val
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
@outline.add_item(text, level, values)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -1,70 +1,70 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
require 'htot_conv/parser/base'
|
3
|
-
|
4
|
-
module HTOTConv
|
5
|
-
module Parser
|
6
|
-
class SimpleText < Base
|
7
|
-
def self.option_help
|
8
|
-
{
|
9
|
-
:indent => {
|
10
|
-
:default => "\t",
|
11
|
-
:pat => String,
|
12
|
-
:desc => "indent character (default: TAB)",
|
13
|
-
},
|
14
|
-
:delimiter => {
|
15
|
-
:default => nil,
|
16
|
-
:pat => String,
|
17
|
-
:desc => "separator character of additional data",
|
18
|
-
},
|
19
|
-
:preserve_empty_line => {
|
20
|
-
:default => false,
|
21
|
-
:pat => FalseClass,
|
22
|
-
:desc => "preserve empty line as a level-1 item (default: no)",
|
23
|
-
},
|
24
|
-
:key_header => {
|
25
|
-
:default => [],
|
26
|
-
:pat => Array,
|
27
|
-
:desc => "key header",
|
28
|
-
},
|
29
|
-
:value_header => {
|
30
|
-
:default => [],
|
31
|
-
:pat => Array,
|
32
|
-
:desc => "value header",
|
33
|
-
},
|
34
|
-
}
|
35
|
-
end
|
36
|
-
|
37
|
-
def parse(input)
|
38
|
-
indent_regexp = Regexp.new("^(?<indents>(#{Regexp.escape(@option[:indent])})*)")
|
39
|
-
delimiter_regexp = (@option[:delimiter].kind_of?(String))? Regexp.new(Regexp.escape(@option[:delimiter])) : @option[:delimiter]
|
40
|
-
outline = HTOTConv::Outline.new
|
41
|
-
outline.key_header = @option[:key_header]
|
42
|
-
outline.value_header = @option[:value_header]
|
43
|
-
|
44
|
-
input.each_line do |line|
|
45
|
-
next if ((line.chomp == "") && (!@option[:preserve_empty_line]))
|
46
|
-
|
47
|
-
level = 1
|
48
|
-
value = []
|
49
|
-
if (@option[:indent] || '').length > 0
|
50
|
-
indents = indent_regexp.match(line)[:indents]
|
51
|
-
level = 1 + indents.length / @option[:indent].length
|
52
|
-
line = line.sub(indent_regexp, "")
|
53
|
-
end
|
54
|
-
|
55
|
-
line = line.strip
|
56
|
-
if delimiter_regexp
|
57
|
-
key = line.split(delimiter_regexp)[0]
|
58
|
-
value = line.split(delimiter_regexp)[1..-1] || []
|
59
|
-
else
|
60
|
-
key = line
|
61
|
-
end
|
62
|
-
|
63
|
-
outline.add_item(key, level, value)
|
64
|
-
end
|
65
|
-
|
66
|
-
outline
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'htot_conv/parser/base'
|
3
|
+
|
4
|
+
module HTOTConv
|
5
|
+
module Parser
|
6
|
+
class SimpleText < Base
|
7
|
+
def self.option_help
|
8
|
+
{
|
9
|
+
:indent => {
|
10
|
+
:default => "\t",
|
11
|
+
:pat => String,
|
12
|
+
:desc => "indent character (default: TAB)",
|
13
|
+
},
|
14
|
+
:delimiter => {
|
15
|
+
:default => nil,
|
16
|
+
:pat => String,
|
17
|
+
:desc => "separator character of additional data",
|
18
|
+
},
|
19
|
+
:preserve_empty_line => {
|
20
|
+
:default => false,
|
21
|
+
:pat => FalseClass,
|
22
|
+
:desc => "preserve empty line as a level-1 item (default: no)",
|
23
|
+
},
|
24
|
+
:key_header => {
|
25
|
+
:default => [],
|
26
|
+
:pat => Array,
|
27
|
+
:desc => "key header",
|
28
|
+
},
|
29
|
+
:value_header => {
|
30
|
+
:default => [],
|
31
|
+
:pat => Array,
|
32
|
+
:desc => "value header",
|
33
|
+
},
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def parse(input)
|
38
|
+
indent_regexp = Regexp.new("^(?<indents>(#{Regexp.escape(@option[:indent])})*)")
|
39
|
+
delimiter_regexp = (@option[:delimiter].kind_of?(String))? Regexp.new(Regexp.escape(@option[:delimiter])) : @option[:delimiter]
|
40
|
+
outline = HTOTConv::Outline.new
|
41
|
+
outline.key_header = @option[:key_header]
|
42
|
+
outline.value_header = @option[:value_header]
|
43
|
+
|
44
|
+
input.each_line do |line|
|
45
|
+
next if ((line.chomp == "") && (!@option[:preserve_empty_line]))
|
46
|
+
|
47
|
+
level = 1
|
48
|
+
value = []
|
49
|
+
if (@option[:indent] || '').length > 0
|
50
|
+
indents = indent_regexp.match(line)[:indents]
|
51
|
+
level = 1 + indents.length / @option[:indent].length
|
52
|
+
line = line.sub(indent_regexp, "")
|
53
|
+
end
|
54
|
+
|
55
|
+
line = line.strip
|
56
|
+
if delimiter_regexp
|
57
|
+
key = line.split(delimiter_regexp)[0]
|
58
|
+
value = line.split(delimiter_regexp)[1..-1] || []
|
59
|
+
else
|
60
|
+
key = line
|
61
|
+
end
|
62
|
+
|
63
|
+
outline.add_item(key, level, value)
|
64
|
+
end
|
65
|
+
|
66
|
+
outline
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/lib/htot_conv/util.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTOTConv
|
4
|
-
module Util
|
5
|
-
def pad_array(array, length, pad=nil)
|
6
|
-
raise ArgumentError, "array is not an array" unless array.kind_of?(Array)
|
7
|
-
raise ArgumentError, "array length #{array.length} is larger than #{length}" if array.length > length
|
8
|
-
|
9
|
-
array.concat(Array.new(length - array.length, pad))
|
10
|
-
end
|
11
|
-
module_function :pad_array
|
12
|
-
end
|
13
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTOTConv
|
4
|
+
module Util
|
5
|
+
def pad_array(array, length, pad=nil)
|
6
|
+
raise ArgumentError, "array is not an array" unless array.kind_of?(Array)
|
7
|
+
raise ArgumentError, "array length #{array.length} is larger than #{length}" if array.length > length
|
8
|
+
|
9
|
+
array.concat(Array.new(length - array.length, pad))
|
10
|
+
end
|
11
|
+
module_function :pad_array
|
12
|
+
end
|
13
|
+
end
|