coradoc 1.1.5 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +1 -0
  3. data/.irbrc +1 -0
  4. data/.rspec +3 -0
  5. data/.rubocop.yml +5 -1
  6. data/.rubocop_todo.yml +179 -0
  7. data/Gemfile +11 -0
  8. data/README.adoc +5 -7
  9. data/coradoc.gemspec +5 -16
  10. data/exe/reverse_adoc +1 -1
  11. data/exe/w2a +1 -1
  12. data/flake.lock +114 -0
  13. data/flake.nix +135 -0
  14. data/lib/coradoc/cli.rb +1 -1
  15. data/lib/coradoc/converter.rb +4 -5
  16. data/lib/coradoc/element/attribute.rb +10 -1
  17. data/lib/coradoc/element/attribute_list.rb +4 -3
  18. data/lib/coradoc/element/audio.rb +1 -1
  19. data/lib/coradoc/element/author.rb +2 -2
  20. data/lib/coradoc/element/base.rb +14 -2
  21. data/lib/coradoc/element/bibliography.rb +1 -1
  22. data/lib/coradoc/element/bibliography_entry.rb +1 -1
  23. data/lib/coradoc/element/block/open.rb +1 -1
  24. data/lib/coradoc/element/block.rb +1 -1
  25. data/lib/coradoc/element/document_attributes.rb +8 -2
  26. data/lib/coradoc/element/image/block_image.rb +3 -2
  27. data/lib/coradoc/element/image/core.rb +5 -4
  28. data/lib/coradoc/element/inline/attribute_reference.rb +19 -0
  29. data/lib/coradoc/element/inline/cross_reference.rb +4 -3
  30. data/lib/coradoc/element/inline/footnote.rb +24 -0
  31. data/lib/coradoc/element/inline/small.rb +19 -0
  32. data/lib/coradoc/element/inline/span.rb +37 -0
  33. data/lib/coradoc/element/inline/underline.rb +19 -0
  34. data/lib/coradoc/element/inline.rb +5 -1
  35. data/lib/coradoc/element/list/core.rb +2 -2
  36. data/lib/coradoc/element/list/ordered.rb +1 -0
  37. data/lib/coradoc/element/list/unordered.rb +1 -0
  38. data/lib/coradoc/element/list_item.rb +19 -20
  39. data/lib/coradoc/element/table.rb +4 -2
  40. data/lib/coradoc/element/term.rb +1 -0
  41. data/lib/coradoc/element/text_element.rb +4 -1
  42. data/lib/coradoc/element/title.rb +1 -1
  43. data/lib/coradoc/element/video.rb +2 -2
  44. data/lib/coradoc/input/adoc.rb +20 -18
  45. data/lib/coradoc/input/docx.rb +25 -23
  46. data/lib/coradoc/input/html/README.adoc +1 -1
  47. data/lib/coradoc/input/html/cleaner.rb +121 -117
  48. data/lib/coradoc/input/html/config.rb +58 -56
  49. data/lib/coradoc/input/html/converters/a.rb +44 -39
  50. data/lib/coradoc/input/html/converters/aside.rb +12 -8
  51. data/lib/coradoc/input/html/converters/audio.rb +24 -20
  52. data/lib/coradoc/input/html/converters/base.rb +103 -99
  53. data/lib/coradoc/input/html/converters/blockquote.rb +18 -14
  54. data/lib/coradoc/input/html/converters/br.rb +11 -7
  55. data/lib/coradoc/input/html/converters/bypass.rb +77 -73
  56. data/lib/coradoc/input/html/converters/code.rb +18 -14
  57. data/lib/coradoc/input/html/converters/div.rb +15 -11
  58. data/lib/coradoc/input/html/converters/dl.rb +51 -44
  59. data/lib/coradoc/input/html/converters/drop.rb +21 -17
  60. data/lib/coradoc/input/html/converters/em.rb +16 -12
  61. data/lib/coradoc/input/html/converters/figure.rb +19 -15
  62. data/lib/coradoc/input/html/converters/h.rb +32 -30
  63. data/lib/coradoc/input/html/converters/head.rb +17 -13
  64. data/lib/coradoc/input/html/converters/hr.rb +11 -7
  65. data/lib/coradoc/input/html/converters/ignore.rb +15 -11
  66. data/lib/coradoc/input/html/converters/img.rb +98 -93
  67. data/lib/coradoc/input/html/converters/li.rb +13 -9
  68. data/lib/coradoc/input/html/converters/mark.rb +14 -10
  69. data/lib/coradoc/input/html/converters/markup.rb +22 -18
  70. data/lib/coradoc/input/html/converters/math.rb +26 -19
  71. data/lib/coradoc/input/html/converters/ol.rb +55 -50
  72. data/lib/coradoc/input/html/converters/p.rb +16 -12
  73. data/lib/coradoc/input/html/converters/pass_through.rb +12 -8
  74. data/lib/coradoc/input/html/converters/pre.rb +49 -45
  75. data/lib/coradoc/input/html/converters/q.rb +12 -8
  76. data/lib/coradoc/input/html/converters/strong.rb +15 -11
  77. data/lib/coradoc/input/html/converters/sub.rb +15 -11
  78. data/lib/coradoc/input/html/converters/sup.rb +15 -11
  79. data/lib/coradoc/input/html/converters/table.rb +21 -13
  80. data/lib/coradoc/input/html/converters/td.rb +64 -60
  81. data/lib/coradoc/input/html/converters/text.rb +24 -20
  82. data/lib/coradoc/input/html/converters/th.rb +13 -9
  83. data/lib/coradoc/input/html/converters/tr.rb +17 -13
  84. data/lib/coradoc/input/html/converters/video.rb +24 -20
  85. data/lib/coradoc/input/html/converters.rb +45 -43
  86. data/lib/coradoc/input/html/errors.rb +8 -6
  87. data/lib/coradoc/input/html/html_converter.rb +93 -90
  88. data/lib/coradoc/input/html/plugin.rb +104 -104
  89. data/lib/coradoc/input/html/plugins/plateau.rb +197 -190
  90. data/lib/coradoc/input/html/postprocessor.rb +188 -182
  91. data/lib/coradoc/input/html.rb +34 -32
  92. data/lib/coradoc/oscal.rb +18 -5
  93. data/lib/coradoc/output/adoc.rb +13 -11
  94. data/lib/coradoc/output/coradoc_tree_debug.rb +15 -13
  95. data/lib/coradoc/parser/asciidoc/admonition.rb +6 -6
  96. data/lib/coradoc/parser/asciidoc/attribute_list.rb +43 -27
  97. data/lib/coradoc/parser/asciidoc/base.rb +3 -6
  98. data/lib/coradoc/parser/asciidoc/bibliography.rb +5 -6
  99. data/lib/coradoc/parser/asciidoc/block.rb +30 -31
  100. data/lib/coradoc/parser/asciidoc/citation.rb +11 -29
  101. data/lib/coradoc/parser/asciidoc/content.rb +23 -33
  102. data/lib/coradoc/parser/asciidoc/document_attributes.rb +2 -3
  103. data/lib/coradoc/parser/asciidoc/header.rb +1 -2
  104. data/lib/coradoc/parser/asciidoc/inline.rb +165 -42
  105. data/lib/coradoc/parser/asciidoc/list.rb +27 -27
  106. data/lib/coradoc/parser/asciidoc/paragraph.rb +28 -19
  107. data/lib/coradoc/parser/asciidoc/section.rb +11 -17
  108. data/lib/coradoc/parser/asciidoc/table.rb +5 -5
  109. data/lib/coradoc/parser/asciidoc/term.rb +24 -8
  110. data/lib/coradoc/parser/asciidoc/text.rb +18 -21
  111. data/lib/coradoc/parser/base.rb +0 -3
  112. data/lib/coradoc/reverse_adoc.rb +3 -3
  113. data/lib/coradoc/transformer.rb +167 -137
  114. data/lib/coradoc/version.rb +1 -1
  115. data/lib/reverse_adoc.rb +1 -1
  116. data/utils/inspect_asciidoc.rb +29 -0
  117. data/utils/parser_analyzer.rb +14 -14
  118. data/utils/round_trip.rb +31 -15
  119. metadata +34 -137
  120. data/.hound.yml +0 -5
  121. data/lib/coradoc/element/inline/citation.rb +0 -24
  122. data/todo.md +0 -10
@@ -1,131 +1,131 @@
1
- module Coradoc::Input::HTML
2
- class Plugin
3
- #### Plugin system general
4
-
5
- # Allow building plugins with a shorthand syntax:
6
- # plugin = Coradoc::Input::HTML::Plugin.new do
7
- # def name = "Test"
8
- # end
9
-
10
- def self.new(&block)
11
- if self == Plugin
12
- Class.new(Plugin, &block)
13
- else
14
- super
15
- end
16
- end
17
-
18
- def initialize
19
- @html_tree_hooks_pre = {}
20
- @html_tree_hooks_post = {}
21
- end
1
+ module Coradoc
2
+ module Input
3
+ module Html
4
+ class Plugin
5
+ #### Plugin system general
6
+
7
+ # Allow building plugins with a shorthand syntax:
8
+ # plugin = Coradoc::Input::Html::Plugin.new do
9
+ # def name = "Test"
10
+ # end
11
+
12
+ def self.new(&)
13
+ if self == Plugin
14
+ Class.new(Plugin, &)
15
+ else
16
+ super
17
+ end
18
+ end
22
19
 
23
- # define name to name a Plugin
24
- def name
25
- self.class.name
26
- end
20
+ def initialize
21
+ @html_tree_hooks_pre = {}
22
+ @html_tree_hooks_post = {}
23
+ end
27
24
 
28
- #### HTML Tree functionalities
25
+ # define name to name a Plugin
26
+ def name
27
+ self.class.name
28
+ end
29
29
 
30
- attr_accessor :html_tree
30
+ #### HTML Tree functionalities
31
31
 
32
- def html_tree_change_tag_name_by_css(css, new_name)
33
- html_tree.css(css).each do |e|
34
- e.name = new_name
35
- end
36
- end
32
+ attr_accessor :html_tree, :coradoc_tree, :asciidoc_string
37
33
 
38
- def html_tree_change_properties_by_css(css, properties)
39
- html_tree.css(css).each do |e|
40
- properties.each do |k,v|
41
- e[k.to_s] = v
34
+ def html_tree_change_tag_name_by_css(css, new_name)
35
+ html_tree.css(css).each do |e|
36
+ e.name = new_name
37
+ end
42
38
  end
43
- end
44
- end
45
-
46
- def html_tree_remove_by_css(css)
47
- html_tree.css(css).each(&:remove)
48
- end
49
39
 
50
- def html_tree_replace_with_children_by_css(css)
51
- html_tree.css(css).each do |e|
52
- e.replace(e.children)
53
- end
54
- end
40
+ def html_tree_change_properties_by_css(css, properties)
41
+ html_tree.css(css).each do |e|
42
+ properties.each do |k, v|
43
+ e[k.to_s] = v
44
+ end
45
+ end
46
+ end
55
47
 
56
- def html_tree_process_to_coradoc(tree, state = {})
57
- Coradoc::Input::HTML::Converters.process_coradoc(tree, state)
58
- end
48
+ def html_tree_remove_by_css(css)
49
+ html_tree.css(css).each(&:remove)
50
+ end
59
51
 
60
- def html_tree_process_to_adoc(tree, state = {})
61
- Coradoc::Input::HTML::Converters.process(tree, state)
62
- end
52
+ def html_tree_replace_with_children_by_css(css)
53
+ html_tree.css(css).each do |e|
54
+ e.replace(e.children)
55
+ end
56
+ end
63
57
 
64
- def html_tree_preview
65
- Tempfile.open(%w"coradoc .html") do |i|
66
- i << html_tree.to_html
67
- system "chromium-browser", "--no-sandbox", i.path
68
- end
69
- end
58
+ def html_tree_process_to_coradoc(tree, state = {})
59
+ Coradoc::Input::Html::Converters.process_coradoc(tree, state)
60
+ end
70
61
 
71
- # define preprocess_html_tree to process HTML trees
62
+ def html_tree_process_to_adoc(tree, state = {})
63
+ Coradoc::Input::Html::Converters.process(tree, state)
64
+ end
72
65
 
73
- # Creates a hook to be called instead of converting an element
74
- # to a Coradoc node.
75
- #
76
- # proc |html_node, state|
77
- # coradoc_node
78
- # end
79
- def html_tree_add_hook_pre(element, &block)
80
- @html_tree_hooks_pre[element] = block
81
- end
66
+ def html_tree_preview
67
+ Tempfile.open(%w"coradoc .html") do |i|
68
+ i << html_tree.to_html
69
+ system "chromium-browser", "--no-sandbox", i.path
70
+ end
71
+ end
82
72
 
83
- def html_tree_add_hook_pre_by_css(css, &block)
84
- html_tree.css(css).each do |e|
85
- html_tree_add_hook_pre(e, &block)
86
- end
87
- end
73
+ # define preprocess_html_tree to process HTML trees
88
74
 
89
- # Creates a hook to be called after converting an element
90
- # to a Coradoc node.
91
- #
92
- # proc |html_node, coradoc_node, state|
93
- # coradoc_node
94
- # end
95
- def html_tree_add_hook_post(element, &block)
96
- @html_tree_hooks_post[element] = block
97
- end
75
+ # Creates a hook to be called instead of converting an element
76
+ # to a Coradoc node.
77
+ #
78
+ # proc |html_node, state|
79
+ # coradoc_node
80
+ # end
81
+ def html_tree_add_hook_pre(element, &block)
82
+ @html_tree_hooks_pre[element] = block
83
+ end
98
84
 
99
- def html_tree_add_hook_post_by_css(css, &block)
100
- html_tree.css(css).each do |e|
101
- html_tree_add_hook_post(e, &block)
102
- end
103
- end
85
+ def html_tree_add_hook_pre_by_css(css, &block)
86
+ html_tree.css(css).each do |e|
87
+ html_tree_add_hook_pre(e, &block)
88
+ end
89
+ end
104
90
 
105
- def html_tree_run_hooks(node, state, &_block)
106
- hook_pre = @html_tree_hooks_pre[node]
107
- hook_post = @html_tree_hooks_post[node]
91
+ # Creates a hook to be called after converting an element
92
+ # to a Coradoc node.
93
+ #
94
+ # proc |html_node, coradoc_node, state|
95
+ # coradoc_node
96
+ # end
97
+ def html_tree_add_hook_post(element, &block)
98
+ @html_tree_hooks_post[element] = block
99
+ end
108
100
 
109
- coradoc = hook_pre.(node, state) if hook_pre
110
- coradoc ||= yield node, state
101
+ def html_tree_add_hook_post_by_css(css, &block)
102
+ html_tree.css(css).each do |e|
103
+ html_tree_add_hook_post(e, &block)
104
+ end
105
+ end
111
106
 
112
- if hook_post
113
- coradoc = hook_post.(node, coradoc, state)
114
- end
107
+ def html_tree_run_hooks(node, state, &_block)
108
+ hook_pre = @html_tree_hooks_pre[node]
109
+ hook_post = @html_tree_hooks_post[node]
115
110
 
116
- coradoc
117
- end
111
+ coradoc = hook_pre.(node, state) if hook_pre
112
+ coradoc ||= yield node, state
118
113
 
119
- #### Coradoc tree functionalities
114
+ if hook_post
115
+ coradoc = hook_post.(node, coradoc, state)
116
+ end
120
117
 
121
- attr_accessor :coradoc_tree
118
+ coradoc
119
+ end
122
120
 
123
- # define postprocess_coradoc_tree to change coradoc tree
121
+ #### Coradoc tree functionalities
124
122
 
125
- #### AsciiDoc string functionalities
123
+ # define postprocess_coradoc_tree to change coradoc tree
126
124
 
127
- attr_accessor :asciidoc_string
125
+ #### AsciiDoc string functionalities
128
126
 
129
- # define postprocess_asciidoc_string to change the coradoc string
127
+ # define postprocess_asciidoc_string to change the coradoc string
128
+ end
129
+ end
130
130
  end
131
131
  end
@@ -1,206 +1,213 @@
1
- module Coradoc::Input::HTML
2
- class Plugin
3
- # This plugin enhances documents from the PLATEAU project
4
- # to extract more data.
5
- #
6
- # Usage:
7
- # reverse_adoc -rcoradoc/input/html/plugins/plateau
8
- # --external-images -u raise --output _out/index.adoc index.html
9
- class Plateau < Plugin
10
- def name
11
- "PLATEAU"
12
- end
13
-
14
- def preprocess_html_tree
15
- # Let's simplify the tree by removing what's extraneous
16
- # html_tree_remove_by_css("script, style, img.container_imagebox:not([src])")
17
- # html_tree_replace_with_children_by_css("div.container_box")
18
- # html_tree_replace_with_children_by_css("div.col.col-12")
19
- # html_tree_replace_with_children_by_css(".tabledatatext, .tabledatatextY")
20
- # html_tree_replace_with_children_by_css("div.row")
1
+ module Coradoc
2
+ module Input
3
+ module Html
4
+ class Plugin
5
+ # This plugin enhances documents from the PLATEAU project
6
+ # to extract more data.
21
7
  #
22
- # We can remove that, but it messes up the images and paragraphs.
23
-
24
- # Remove side menu, so we can generate TOC ourselves
25
- html_tree_remove_by_css(".sideMenu")
26
-
27
- # Correct non-semantic classes into semantic HTML tags
28
- html_tree_change_tag_name_by_css(".titledata", "h1")
29
- html_tree_change_tag_name_by_css(".subtitledata", "h2")
30
- html_tree_change_tag_name_by_css(".pitemdata", "h3")
31
- html_tree_change_tag_name_by_css(".sitemdata", "h4")
32
- html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
33
- html_tree_change_tag_name_by_css('td[bgcolor="#d0cece"]', "th")
34
- html_tree_change_tag_name_by_css('.framedata, .frame_container_box', 'aside')
35
- html_tree_change_tag_name_by_css('.frame2data', 'pre')
36
- # Assumption that all code snippets in those documents are XML...
37
- html_tree_change_properties_by_css(".frame2data", class: "brush:xml;")
38
-
39
- # Remove some CSS ids that are not important to us
40
- html_tree_change_properties_by_css("#__nuxt", id: nil)
41
- html_tree_change_properties_by_css("#__layout", id: nil)
42
- html_tree_change_properties_by_css("#app", id: nil)
43
-
44
- # Handle lists of document 02
45
- html_tree_replace_with_children_by_css(".list_num-wrap")
46
-
47
- # Convert table/img caption to become a caption
48
- html_tree.css(".imagedata").each do |e|
49
- table = e.parent.next&.children&.first
50
- if table&.name == "table"
51
- e.name = "caption"
52
- table.prepend_child(e)
53
- next
54
- end
55
-
56
- img = e.parent.previous&.children&.first
57
- if img&.name == "img" && img["src"]
58
- title = e.text.strip
59
- img["title"] = title
60
- e.remove
61
- next
62
- end
63
- end
64
-
65
- # Add hooks for H1, H2, H3, H4
66
- html_tree_add_hook_post_by_css("h1, h2, h3", &method(:handle_headers))
67
- html_tree_add_hook_post_by_css("h4", &method(:handle_headers_h4))
68
-
69
- # Table cells aligned to center
70
- html_tree_change_properties_by_css(".tableTopCenter", align: "center")
71
-
72
- # Handle non-semantic lists and indentation
73
- html_tree_add_hook_pre_by_css ".text2data" do |node,|
74
- text = html_tree_process_to_adoc(node).strip
75
- next "" if text.empty? || text == "\u3000"
76
-
77
- if text.start_with?(/\d+\./)
78
- text = text.sub(/\A\d+.\s*/, "")
79
- ".. #{text}\n"
80
- else
81
- text = text.gsub(/^/, "** ")
82
- "\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
83
- end
84
- end
85
-
86
- (3..4).each do |i|
87
- html_tree_add_hook_pre_by_css ".text#{i}data" do |node,|
88
- text = html_tree_process_to_adoc(node).strip
89
- next "" if text.empty? || text == "\u3000"
90
-
91
- text = text.strip.gsub(/^/, "#{'*' * i} ")
92
- "\n\n//-PT#{i}D\n#{text}\n//-ENDPT#{i}D\n\n"
93
- end
94
- end
95
-
96
- (2..3).each do |i|
97
- html_tree_add_hook_pre_by_css ".text#{i}data_point ul" do |node,|
98
- text = html_tree_process_to_adoc(node.children.first.children).strip
99
-
100
- "#{'*' * i} #{text}\n"
8
+ # Usage:
9
+ # reverse_adoc -rcoradoc/input/html/plugins/plateau
10
+ # --external-images -u raise --output _out/index.adoc index.html
11
+ class Plateau < Plugin
12
+ def name
13
+ "PLATEAU"
101
14
  end
102
- end
103
15
 
104
- (1..20).each do |i|
105
- html_tree_add_hook_pre_by_css ".numtextdata_num .list_num#{i}" do |node,|
106
- text = html_tree_process_to_adoc(node).strip
107
-
108
- "[start=#{i}]\n. #{text}\n"
16
+ def preprocess_html_tree
17
+ # Let's simplify the tree by removing what's extraneous
18
+ # html_tree_remove_by_css("script, style, img.container_imagebox:not([src])")
19
+ # html_tree_replace_with_children_by_css("div.container_box")
20
+ # html_tree_replace_with_children_by_css("div.col.col-12")
21
+ # html_tree_replace_with_children_by_css(".tabledatatext, .tabledatatextY")
22
+ # html_tree_replace_with_children_by_css("div.row")
23
+ #
24
+ # We can remove that, but it messes up the images and paragraphs.
25
+
26
+ # Remove side menu, so we can generate TOC ourselves
27
+ html_tree_remove_by_css(".sideMenu")
28
+
29
+ # Correct non-semantic classes into semantic HTML tags
30
+ html_tree_change_tag_name_by_css(".titledata", "h1")
31
+ html_tree_change_tag_name_by_css(".subtitledata", "h2")
32
+ html_tree_change_tag_name_by_css(".pitemdata", "h3")
33
+ html_tree_change_tag_name_by_css(".sitemdata", "h4")
34
+ html_tree_change_tag_name_by_css('td[bgcolor="#D0CECE"]', "th")
35
+ html_tree_change_tag_name_by_css('td[bgcolor="#d0cece"]', "th")
36
+ html_tree_change_tag_name_by_css(".framedata, .frame_container_box",
37
+ "aside")
38
+ html_tree_change_tag_name_by_css(".frame2data", "pre")
39
+ # Assumption that all code snippets in those documents are XML...
40
+ html_tree_change_properties_by_css(".frame2data",
41
+ class: "brush:xml;")
42
+
43
+ # Remove some CSS ids that are not important to us
44
+ html_tree_change_properties_by_css("#__nuxt", id: nil)
45
+ html_tree_change_properties_by_css("#__layout", id: nil)
46
+ html_tree_change_properties_by_css("#app", id: nil)
47
+
48
+ # Handle lists of document 02
49
+ html_tree_replace_with_children_by_css(".list_num-wrap")
50
+
51
+ # Convert table/img caption to become a caption
52
+ html_tree.css(".imagedata").each do |e|
53
+ table = e.parent.next&.children&.first
54
+ if table&.name == "table"
55
+ e.name = "caption"
56
+ table.prepend_child(e)
57
+ next
58
+ end
59
+
60
+ img = e.parent.previous&.children&.first
61
+ if img&.name == "img" && img["src"]
62
+ title = e.text.strip
63
+ img["title"] = title
64
+ e.remove
65
+ next
66
+ end
67
+ end
68
+
69
+ # Add hooks for H1, H2, H3, H4
70
+ html_tree_add_hook_post_by_css("h1, h2, h3",
71
+ &method(:handle_headers))
72
+ html_tree_add_hook_post_by_css("h4", &method(:handle_headers_h4))
73
+
74
+ # Table cells aligned to center
75
+ html_tree_change_properties_by_css(".tableTopCenter",
76
+ align: "center")
77
+
78
+ # Handle non-semantic lists and indentation
79
+ html_tree_add_hook_pre_by_css ".text2data" do |node,|
80
+ text = html_tree_process_to_adoc(node).strip
81
+ next "" if text.empty? || text == "\u3000"
82
+
83
+ if text.start_with?(/\d+\./)
84
+ text = text.sub(/\A\d+.\s*/, "")
85
+ ".. #{text}\n"
86
+ else
87
+ text = text.gsub(/^/, "** ")
88
+ "\n\n//-PT2D\n#{text}\n//-ENDPT2D\n\n"
89
+ end
90
+ end
91
+
92
+ (3..4).each do |i|
93
+ html_tree_add_hook_pre_by_css ".text#{i}data" do |node,|
94
+ text = html_tree_process_to_adoc(node).strip
95
+ next "" if text.empty? || text == "\u3000"
96
+
97
+ text = text.strip.gsub(/^/, "#{'*' * i} ")
98
+ "\n\n//-PT#{i}D\n#{text}\n//-ENDPT#{i}D\n\n"
99
+ end
100
+ end
101
+
102
+ (2..3).each do |i|
103
+ html_tree_add_hook_pre_by_css ".text#{i}data_point ul" do |node,|
104
+ text = html_tree_process_to_adoc(node.children.first.children).strip
105
+
106
+ "#{'*' * i} #{text}\n"
107
+ end
108
+ end
109
+
110
+ (1..20).each do |i|
111
+ html_tree_add_hook_pre_by_css ".numtextdata_num .list_num#{i}" do |node,|
112
+ text = html_tree_process_to_adoc(node).strip
113
+
114
+ "[start=#{i}]\n. #{text}\n"
115
+ end
116
+ end
117
+
118
+ # html_tree_preview
109
119
  end
110
- end
111
-
112
- # html_tree_preview
113
- end
114
120
 
115
- IM = /[A-Z0-9]{1,3}/
116
-
117
- def handle_headers(node, coradoc, state)
118
- content = coradoc.content.map(&:content).join
119
-
120
- if %w[toc0 toc_0].any? { |i| coradoc.id&.start_with?(i) }
121
- # Special content
122
- case content.strip
123
- when "はじめに" # Introduction
124
- coradoc.style = "abstract" # The older version document has ".preface"
125
- coradoc.level_int = 1
126
- when "改定の概要" # Revision overview
127
- coradoc.style = "abstract" # The older version document has ".preface"
128
- coradoc.level_int = 1
129
- when "参考文献" # Bibliography
130
- coradoc.style = "bibliography"
131
- coradoc.level_int = 1
132
- when "改訂履歴" # Document history
133
- coradoc.style = "appendix"
134
- coradoc.level_int = 1
135
- when "0 概要" # Overview
136
- coradoc.style = "abstract" # I'm not sure this is correct
137
- coradoc.level_int = 1
138
- when "索引" # Index
139
- coradoc.style = "index" # I'm not sure this is correct
140
- coradoc.level_int = 1
141
- else
142
- warn "Unknown section #{content.inspect}"
121
+ IM = /[A-Z0-9]{1,3}/
122
+
123
+ def handle_headers(node, coradoc, _state)
124
+ content = coradoc.content.map(&:content).join
125
+
126
+ if %w[toc0 toc_0].any? { |i| coradoc.id&.start_with?(i) }
127
+ # Special content
128
+ case content.strip
129
+ when "はじめに" # Introduction
130
+ coradoc.style = "abstract" # The older version document has ".preface"
131
+ coradoc.level_int = 1
132
+ when "改定の概要" # Revision overview
133
+ coradoc.style = "abstract" # The older version document has ".preface"
134
+ coradoc.level_int = 1
135
+ when "参考文献" # Bibliography
136
+ coradoc.style = "bibliography"
137
+ coradoc.level_int = 1
138
+ when "改訂履歴" # Document history
139
+ coradoc.style = "appendix"
140
+ coradoc.level_int = 1
141
+ when "0 概要" # Overview
142
+ coradoc.style = "abstract" # I'm not sure this is correct
143
+ coradoc.level_int = 1
144
+ when "索引" # Index
145
+ coradoc.style = "index" # I'm not sure this is correct
146
+ coradoc.level_int = 1
147
+ else
148
+ warn "Unknown section #{content.inspect}"
149
+ end
150
+ end
151
+
152
+ if node.name == "h1" && content.start_with?("Annex")
153
+ coradoc.style = "appendix"
154
+ coradoc.content.first.content.sub!(/\AAnnex [A-Z]/, "")
155
+ end
156
+
157
+ # Remove numbers
158
+ coradoc.content.first.content.sub!(/\A(#{IM}\.)*#{IM}[[:space:]]/o,
159
+ "")
160
+
161
+ coradoc
143
162
  end
144
- end
145
163
 
146
- if node.name == "h1"
147
- if content.start_with?("Annex")
148
- coradoc.style = "appendix"
149
- coradoc.content.first.content.sub!(/\AAnnex [A-Z]/, "")
164
+ def handle_headers_h4(_node, coradoc, _state)
165
+ title = Coradoc.strip_unicode(coradoc.content.first.content)
166
+ case title
167
+ when /\A\(\d+\)(.*)/
168
+ coradoc.level_int = 4
169
+ coradoc.content.first.content = $1.strip
170
+ coradoc
171
+ when /\A\d+\)(.*)/
172
+ coradoc.level_int = 5
173
+ coradoc.content.first.content = $1.strip
174
+ coradoc
175
+ when /\A#{IM}\.#{IM}\.#{IM}\.#{IM}(.*)/o
176
+ coradoc.level_int = 4
177
+ coradoc.content.first.content = $1.strip
178
+ else
179
+ if title.empty?
180
+ # Strip instances of faulty empty paragraphs
181
+ nil
182
+ else
183
+ ["// FIXME\n", coradoc]
184
+ end
185
+ end
150
186
  end
151
- end
152
187
 
153
- # Remove numbers
154
- coradoc.content.first.content.sub!(/\A(#{IM}\.)*#{IM}[[:space:]]/, "")
155
-
156
- coradoc
157
- end
158
-
159
- def handle_headers_h4(node, coradoc, state)
160
- title = Coradoc.strip_unicode(coradoc.content.first.content)
161
- case title
162
- when /\A\(\d+\)(.*)/
163
- coradoc.level_int = 4
164
- coradoc.content.first.content = $1.strip
165
- coradoc
166
- when /\A\d+\)(.*)/
167
- coradoc.level_int = 5
168
- coradoc.content.first.content = $1.strip
169
- coradoc
170
- when /\A#{IM}\.#{IM}\.#{IM}\.#{IM}(.*)/
171
- coradoc.level_int = 4
172
- coradoc.content.first.content = $1.strip
173
- else
174
- if title.empty?
175
- # Strip instances of faulty empty paragraphs
176
- nil
177
- else
178
- ["// FIXME\n", coradoc]
188
+ def postprocess_asciidoc_string
189
+ str = asciidoc_string
190
+
191
+ ### Custom indentation handling
192
+ # If there's a step up, add [none]
193
+ str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
194
+ str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
195
+ str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
196
+ # Collapse blocks of text[2,3]data
197
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
198
+ # In the beginning, add [none]
199
+ str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
200
+ # If following with another list, ensure we readd styling
201
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
202
+ # Otherwise, clean up
203
+ str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
204
+
205
+ self.asciidoc_string = str
179
206
  end
180
207
  end
181
208
  end
182
-
183
- def postprocess_asciidoc_string
184
- str = self.asciidoc_string
185
-
186
- ### Custom indentation handling
187
- # If there's a step up, add [none]
188
- str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT3D\s+}, "\n[none]\n")
189
- str = str.gsub(%r{\s+//-ENDPT2D\s+//-PT4D\s+}, "\n[none]\n")
190
- str = str.gsub(%r{\s+//-ENDPT3D\s+//-PT4D\s+}, "\n[none]\n")
191
- # Collapse blocks of text[2,3]data
192
- str = str.gsub(%r{\s+//-ENDPT[234]D\s+//-PT[234]D\s+}, "\n\n")
193
- # In the beginning, add [none]
194
- str = str.gsub(%r{\s+//-PT[234]D\s+}, "\n\n[none]\n")
195
- # If following with another list, ensure we readd styling
196
- str = str.gsub(%r{\s+//-ENDPT[234]D\s+\*}, "\n\n[disc]\n*")
197
- # Otherwise, clean up
198
- str = str.gsub(%r{\s+//-ENDPT[234]D\s+}, "\n\n")
199
-
200
- self.asciidoc_string = str
201
- end
202
209
  end
203
210
  end
204
211
  end
205
212
 
206
- Coradoc::Input::HTML.config.plugins << Coradoc::Input::HTML::Plugin::Plateau
213
+ Coradoc::Input::Html.config.plugins << Coradoc::Input::Html::Plugin::Plateau