chem_scanner 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.rubocop.yml +604 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +20 -0
- data/LICENSE.txt +661 -0
- data/README.md +177 -0
- data/Rakefile +8 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/chem_scanner.gemspec +43 -0
- data/lib/chem_scanner.rb +79 -0
- data/lib/chem_scanner/cdx.rb +67 -0
- data/lib/chem_scanner/cdxml.rb +72 -0
- data/lib/chem_scanner/chem_draw/cdx_reader.rb +101 -0
- data/lib/chem_scanner/chem_draw/node/base_node.rb +123 -0
- data/lib/chem_scanner/chem_draw/node/base_value.rb +257 -0
- data/lib/chem_scanner/chem_draw/node/bond.rb +100 -0
- data/lib/chem_scanner/chem_draw/node/bracket_attachment.rb +17 -0
- data/lib/chem_scanner/chem_draw/node/bracket_group.rb +32 -0
- data/lib/chem_scanner/chem_draw/node/chem_geometry.rb +58 -0
- data/lib/chem_scanner/chem_draw/node/color_table.rb +46 -0
- data/lib/chem_scanner/chem_draw/node/font_table.rb +54 -0
- data/lib/chem_scanner/chem_draw/node/fragment.rb +149 -0
- data/lib/chem_scanner/chem_draw/node/fragment_node.rb +145 -0
- data/lib/chem_scanner/chem_draw/node/graphic.rb +94 -0
- data/lib/chem_scanner/chem_draw/node/text.rb +242 -0
- data/lib/chem_scanner/chem_draw/parser.rb +214 -0
- data/lib/chem_scanner/chem_draw/yaml/cdx_objects.yaml +32 -0
- data/lib/chem_scanner/chem_draw/yaml/cdx_props.yaml +263 -0
- data/lib/chem_scanner/chem_draw/yaml/cdxml_objects.yaml +36 -0
- data/lib/chem_scanner/chem_draw/yaml/cdxml_props.yaml +263 -0
- data/lib/chem_scanner/chem_draw/yaml/props_data_type.yaml +263 -0
- data/lib/chem_scanner/configuration/abbreviation.rb +76 -0
- data/lib/chem_scanner/configuration/superatom.rb +76 -0
- data/lib/chem_scanner/configuration/superatom.txt +2874 -0
- data/lib/chem_scanner/configuration/util.rb +40 -0
- data/lib/chem_scanner/configuration/yaml/abbreviations.yaml +6399 -0
- data/lib/chem_scanner/configuration/yaml/elements.yaml +115 -0
- data/lib/chem_scanner/configuration/yaml/solvents.yaml +16 -0
- data/lib/chem_scanner/doc.rb +56 -0
- data/lib/chem_scanner/docx.rb +86 -0
- data/lib/chem_scanner/export/cml.rb +176 -0
- data/lib/chem_scanner/extension/element_map.rb +9 -0
- data/lib/chem_scanner/extension/geometry/bounding_box.rb +84 -0
- data/lib/chem_scanner/extension/geometry/line.rb +123 -0
- data/lib/chem_scanner/extension/geometry/point.rb +18 -0
- data/lib/chem_scanner/extension/geometry/polygon.rb +115 -0
- data/lib/chem_scanner/extension/geometry/segment.rb +196 -0
- data/lib/chem_scanner/extension/passthrough.rb +7 -0
- data/lib/chem_scanner/interpreter/element/arrow.rb +298 -0
- data/lib/chem_scanner/interpreter/element/atom.rb +134 -0
- data/lib/chem_scanner/interpreter/element/fragment.rb +59 -0
- data/lib/chem_scanner/interpreter/element/molecule.rb +473 -0
- data/lib/chem_scanner/interpreter/element/molecule_group.rb +34 -0
- data/lib/chem_scanner/interpreter/element/reaction.rb +186 -0
- data/lib/chem_scanner/interpreter/element/reaction_step.rb +39 -0
- data/lib/chem_scanner/interpreter/formula_to_mol.rb +75 -0
- data/lib/chem_scanner/interpreter/post_process/assemble.rb +38 -0
- data/lib/chem_scanner/interpreter/post_process/label_by_molecule.rb +37 -0
- data/lib/chem_scanner/interpreter/post_process/reaction_info.rb +225 -0
- data/lib/chem_scanner/interpreter/post_process/reaction_step.rb +95 -0
- data/lib/chem_scanner/interpreter/post_process/reagent_label.rb +46 -0
- data/lib/chem_scanner/interpreter/post_process/text_as_molecule.rb +52 -0
- data/lib/chem_scanner/interpreter/post_process/text_label.rb +40 -0
- data/lib/chem_scanner/interpreter/pre_process/arrow.rb +197 -0
- data/lib/chem_scanner/interpreter/pre_process/graphic.rb +41 -0
- data/lib/chem_scanner/interpreter/pre_process/molecule.rb +150 -0
- data/lib/chem_scanner/interpreter/reaction_detection/assign_to_reaction.rb +129 -0
- data/lib/chem_scanner/interpreter/reaction_detection/duplicate_reagents.rb +50 -0
- data/lib/chem_scanner/interpreter/reaction_detection/molecule_group.rb +55 -0
- data/lib/chem_scanner/interpreter/reaction_detection/multi_line_chain_reaction.rb +85 -0
- data/lib/chem_scanner/interpreter/reaction_detection/remove_separated_mol.rb +115 -0
- data/lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb +166 -0
- data/lib/chem_scanner/interpreter/scheme.rb +173 -0
- data/lib/chem_scanner/interpreter/scheme_base.rb +64 -0
- data/lib/chem_scanner/interpreter/text_group/bold_groups.rb +183 -0
- data/lib/chem_scanner/interpreter/text_group/molecule_text_group.rb +138 -0
- data/lib/chem_scanner/interpreter/text_group/reaction_text_groups.rb +221 -0
- data/lib/chem_scanner/interpreter/text_group/retrieve_alias_info.rb +41 -0
- data/lib/chem_scanner/interpreter/text_group/retrieve_n_atoms.rb +106 -0
- data/lib/chem_scanner/interpreter/text_group/text_group_interpreter.rb +92 -0
- data/lib/chem_scanner/perkin_eln.rb +287 -0
- data/lib/chem_scanner/version.rb +5 -0
- data/lib/rubygems_plugin.rb +5 -0
- metadata +244 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ChemScanner
|
4
|
+
module ChemDraw
|
5
|
+
ALIAS_VALUES = [0, 4, 5, 8, 12].freeze
|
6
|
+
|
7
|
+
# CDX Node parser
|
8
|
+
class FragmentNode < BaseNode
|
9
|
+
require "chem_scanner/chem_draw/node/fragment"
|
10
|
+
require "chem_scanner/chem_draw/node/text"
|
11
|
+
|
12
|
+
attr_accessor :num_hydrogens, :atnum, :spin, :charge, :iso, :type,
|
13
|
+
:ext_type, :x, :y, :is_alias, :alias_text,
|
14
|
+
:warning, :warning_data, :fragment, :nested_fragment,
|
15
|
+
:nested_text, :color, :expanded, :point, :is_polymer
|
16
|
+
|
17
|
+
def initialize(parser, parser_type, id)
|
18
|
+
super(parser, parser_type, id)
|
19
|
+
|
20
|
+
@num_hydrogens = -1
|
21
|
+
@atnum = -1
|
22
|
+
@spin = 0
|
23
|
+
@charge = 0
|
24
|
+
@iso = 0
|
25
|
+
@color = 0
|
26
|
+
@type = -1
|
27
|
+
@ext_type = -1
|
28
|
+
@alias_text = ""
|
29
|
+
@warning = false
|
30
|
+
@warning_data = ""
|
31
|
+
@is_alias = false
|
32
|
+
|
33
|
+
@nested_fragment = {}
|
34
|
+
@nested_text = {}
|
35
|
+
@expanded = false
|
36
|
+
@is_polymer = false
|
37
|
+
end
|
38
|
+
|
39
|
+
# rubocop:disable Methods/PerceivedComplexity
|
40
|
+
def parse_node(tag, nid, data)
|
41
|
+
ref = @props_ref[tag]
|
42
|
+
ref = ref.nil? ? @obj_ref[tag] : ref
|
43
|
+
|
44
|
+
case ref
|
45
|
+
when "Node_Element" then @atnum = read_value(tag, data)
|
46
|
+
when "Atom_Radical" then @spin = read_value(tag, data)
|
47
|
+
when "Atom_Isotope" then @iso = read_value(tag, data)
|
48
|
+
when "Fragment"
|
49
|
+
frag = Fragment.new(@parser, @parser_type, nid)
|
50
|
+
frag.read
|
51
|
+
@nested_fragment[nid] = frag
|
52
|
+
when "Atom_GenericNickname"
|
53
|
+
nickname = send("#{@parser_type}_text", data)
|
54
|
+
@generic_nickname = nickname.first[:text] unless nickname.empty?
|
55
|
+
when "Node_Type"
|
56
|
+
@type = read_type(tag, data, CDXML_NODE_TYPE)
|
57
|
+
@is_alias = ALIAS_VALUES.include?(@type)
|
58
|
+
when "2DPosition" then @x, @y = read_value(tag, data)
|
59
|
+
when "Atom_Charge" then @charge = read_value(tag, data)
|
60
|
+
when "Text"
|
61
|
+
@text = Text.new(@parser, @parser_type, nid, true)
|
62
|
+
# NOTE: MUST read first in order to maintain CDX reader
|
63
|
+
@text.read
|
64
|
+
@polygon = @text.polygon
|
65
|
+
|
66
|
+
@nested_text[@text.id] = @text
|
67
|
+
when "ChemicalWarning"
|
68
|
+
@warning = true
|
69
|
+
@warning_data = @parser_type == "cdxml" ? data.text : data
|
70
|
+
when "Atom_NumHydrogens" then @num_hydrogens = read_value(tag, data)
|
71
|
+
when "ForegroundColor" then @color = read_value(tag, data)
|
72
|
+
when "Atom_ExternalConnectionType"
|
73
|
+
@ext_type = read_type(tag, data, CDXML_ATOM_EXTERNAL_CONNECTION_TYPE)
|
74
|
+
else do_unhandled(tag)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
# rubocop:enable Methods/PerceivedComplexity
|
78
|
+
|
79
|
+
def post_parse_node
|
80
|
+
@point = Geometry::Point.new(@x, @y)
|
81
|
+
|
82
|
+
if !@text.nil? && !@text.value.empty?
|
83
|
+
@alias_text = @text.value
|
84
|
+
return
|
85
|
+
end
|
86
|
+
|
87
|
+
interpreter = ChemScanner::Interpreter
|
88
|
+
if !@generic_nickname.nil? &&
|
89
|
+
interpreter.rgroup_atom?(@generic_nickname)
|
90
|
+
@is_alias = true
|
91
|
+
@type = 7
|
92
|
+
@alias_text = @generic_nickname
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def leftbottom
|
97
|
+
@polygon.nil? ? point : @polygon.bounding_box.leftbottom
|
98
|
+
end
|
99
|
+
|
100
|
+
def righttop
|
101
|
+
@polygon.nil? ? point : @polygon.bounding_box.righttop
|
102
|
+
end
|
103
|
+
|
104
|
+
def has_nil_coord?
|
105
|
+
(@x.nil? || @y.nil?) && @polygon.nil?
|
106
|
+
end
|
107
|
+
|
108
|
+
def set_type(type)
|
109
|
+
@type = type
|
110
|
+
end
|
111
|
+
|
112
|
+
def set_expanded
|
113
|
+
@expanded = true
|
114
|
+
end
|
115
|
+
|
116
|
+
def set_is_polymer
|
117
|
+
@is_alias = true
|
118
|
+
@is_polymer = true
|
119
|
+
end
|
120
|
+
|
121
|
+
def clone
|
122
|
+
cloned = self.class.new(@parser, @parser_type, nil)
|
123
|
+
cloned.num_hydrogens = @num_hydrogens
|
124
|
+
cloned.atnum = @atnum
|
125
|
+
cloned.spin = @spin
|
126
|
+
cloned.charge = @charge
|
127
|
+
cloned.iso = @iso
|
128
|
+
cloned.color = @color
|
129
|
+
cloned.type = @type
|
130
|
+
cloned.alias_text = @alias_text
|
131
|
+
cloned.warning = @warning
|
132
|
+
cloned.warning_data = @warning_data
|
133
|
+
cloned.is_alias = @is_alias
|
134
|
+
cloned.expanded = @expanded
|
135
|
+
|
136
|
+
cloned.nested_fragment = {}
|
137
|
+
cloned.nested_text = {}
|
138
|
+
@nested_fragment.each { |k, v| cloned.nested_fragment[k] = v }
|
139
|
+
@nested_text.each { |k, v| cloned.nested_text[k] = v }
|
140
|
+
|
141
|
+
cloned
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ChemScanner
|
4
|
+
module ChemDraw
|
5
|
+
# CDX Graphic parser
|
6
|
+
class Graphic < BaseNode
|
7
|
+
attr_reader :arrow_id, :type, :arrow_head, :head, :tail,
|
8
|
+
:line_type, :orbital_type, :oval_type, :polygon
|
9
|
+
|
10
|
+
GRAPHIC_BRACKET_TYPE = 6
|
11
|
+
|
12
|
+
def initialize(parser, parser_type, id)
|
13
|
+
super(parser, parser_type, id)
|
14
|
+
|
15
|
+
@line_type = 0
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_node(tag, _id, data)
|
19
|
+
case @props_ref[tag]
|
20
|
+
when "Arrow_Type"
|
21
|
+
@arrow_head = read_type(tag, data, CDXML_ARROW_TYPE)
|
22
|
+
when "Line_Type"
|
23
|
+
@line_type = read_type(tag, data, CDXML_LINE_TYPE)
|
24
|
+
when "Graphic_Type"
|
25
|
+
@type = read_type(tag, data, CDXML_GRAPHIC_TYPE)
|
26
|
+
# Graphic objects are the only objects whose kCDXProp_BoundingBox
|
27
|
+
# property has a special meaning, representing a pair of points
|
28
|
+
# rather than a rectangle.
|
29
|
+
when "BoundingBox" then @polygon = read_value(tag, data)
|
30
|
+
when "SupersededBy" then @arrow_id = read_value(tag, data)
|
31
|
+
when "3DMajorAxisEnd" then @right, @top = read_value(tag, data)
|
32
|
+
when "3DMinorAxisEnd" then @left, @bottom = read_value(tag, data)
|
33
|
+
when "Orbital_Type"
|
34
|
+
@orbital_type = read_type(tag, data, CDXML_ORBITAL_TYPE)
|
35
|
+
when "Oval_Type"
|
36
|
+
@oval_type = read_type(tag, data, CDXML_OVAL_TYPE)
|
37
|
+
else do_unhandled(tag)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def post_parse_node
|
42
|
+
# When dealing with orbital, boundingbox is not reliable
|
43
|
+
build_orbital_polygon if @type == 5
|
44
|
+
|
45
|
+
# In case of Graphic is arrow
|
46
|
+
# Treat as arrow if is a line, no "SupersededBy" and has "BoundingBox"
|
47
|
+
return unless @type == 1 && @arrow_id.nil? && !@polygon.nil?
|
48
|
+
|
49
|
+
vertices = @polygon.vertices
|
50
|
+
# start point ~ head
|
51
|
+
sp = vertices[1]
|
52
|
+
# end point ~ tail
|
53
|
+
ep = vertices[3]
|
54
|
+
|
55
|
+
@head = { x: sp.x, y: sp.y }
|
56
|
+
@tail = { x: ep.x, y: ep.y }
|
57
|
+
end
|
58
|
+
|
59
|
+
def build_orbital_polygon
|
60
|
+
return unless @orbital_type == 256 && @oval_type == 3
|
61
|
+
|
62
|
+
p1 = Geometry::Point.new(@left, @bottom)
|
63
|
+
p2 = Geometry::Point.new(@left, @top)
|
64
|
+
p3 = Geometry::Point.new(@right, @top)
|
65
|
+
p4 = Geometry::Point.new(@right, @bottom)
|
66
|
+
|
67
|
+
@polygon = Geometry::Polygon.new([p1, p2, p3, p4])
|
68
|
+
end
|
69
|
+
|
70
|
+
def line?
|
71
|
+
@type == 1 && @arrow_id.nil? && (@arrow_head.nil? || @arrow_head.zero?)
|
72
|
+
end
|
73
|
+
|
74
|
+
def segment
|
75
|
+
Geometry::Segment.new_by_arrays(
|
76
|
+
[@tail[:x], @tail[:y]],
|
77
|
+
[@head[:x], @head[:y]],
|
78
|
+
)
|
79
|
+
end
|
80
|
+
|
81
|
+
def vector
|
82
|
+
segment.to_vector
|
83
|
+
end
|
84
|
+
|
85
|
+
def line
|
86
|
+
segment.to_line
|
87
|
+
end
|
88
|
+
|
89
|
+
def cross?
|
90
|
+
false
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,242 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module ChemScanner
|
5
|
+
module ChemDraw
|
6
|
+
# Text parser
|
7
|
+
class Text < BaseNode
|
8
|
+
attr_accessor :warning, :warning_data, :x, :y, :styled_text, :value,
|
9
|
+
:center, :polygon, :bold_text, :non_bold_text
|
10
|
+
|
11
|
+
GREEK_CHARS = {
|
12
|
+
"A" => "Α",
|
13
|
+
"a" => "α",
|
14
|
+
"B" => "Β",
|
15
|
+
"b" => "β",
|
16
|
+
"G" => "Γ",
|
17
|
+
"g" => "γ",
|
18
|
+
"D" => "Δ",
|
19
|
+
"d" => "δ",
|
20
|
+
"E" => "Ε",
|
21
|
+
"e" => "ε",
|
22
|
+
"Z" => "Ζ",
|
23
|
+
"z" => "ζ",
|
24
|
+
"H" => "Η",
|
25
|
+
"h" => "η",
|
26
|
+
"Q" => "Θ",
|
27
|
+
"q" => "θ",
|
28
|
+
"I" => "Ι",
|
29
|
+
"i" => "ι",
|
30
|
+
"K" => "Κ",
|
31
|
+
"k" => "κ",
|
32
|
+
"L" => "Λ",
|
33
|
+
"l" => "λ",
|
34
|
+
"M" => "Μ",
|
35
|
+
"m" => "μ",
|
36
|
+
"N" => "Ν",
|
37
|
+
"n" => "ν",
|
38
|
+
"C" => "Ξ",
|
39
|
+
"c" => "ξ",
|
40
|
+
"O" => "Ο",
|
41
|
+
"o" => "ο",
|
42
|
+
"P" => "Π",
|
43
|
+
"p" => "π",
|
44
|
+
"R" => "Ρ",
|
45
|
+
"r" => "ρ",
|
46
|
+
"S" => "Σ",
|
47
|
+
"s" => "σ",
|
48
|
+
"T" => "Τ",
|
49
|
+
"t" => "τ",
|
50
|
+
"U" => "Υ",
|
51
|
+
"u" => "υ",
|
52
|
+
"F" => "Φ",
|
53
|
+
"f" => "φ",
|
54
|
+
"X" => "Χ",
|
55
|
+
"x" => "χ",
|
56
|
+
"Y" => "Ψ",
|
57
|
+
"y" => "ψ",
|
58
|
+
"W" => "Ω",
|
59
|
+
"w" => "ω",
|
60
|
+
}.freeze
|
61
|
+
|
62
|
+
BOLD_VAL = 0x01
|
63
|
+
FONT_KEY = "face"
|
64
|
+
COLOR_KEY = "color"
|
65
|
+
|
66
|
+
def initialize(parser, parser_type, id, is_alias = false)
|
67
|
+
super(parser, parser_type, id)
|
68
|
+
|
69
|
+
@warning = false
|
70
|
+
@is_alias = is_alias
|
71
|
+
|
72
|
+
@bold_text = ""
|
73
|
+
@value = ""
|
74
|
+
end
|
75
|
+
|
76
|
+
def parse_node(tag, _id, data)
|
77
|
+
# NOTE: CDXML text does not have tag
|
78
|
+
# "Text" below only happens for CDX
|
79
|
+
case @props_ref[tag]
|
80
|
+
when "Text" then @styled_text = cdx_text(data)
|
81
|
+
when "2DPosition" then @x, @y = read_value(tag, data)
|
82
|
+
when "BoundingBox" then @polygon = read_value(tag, data)
|
83
|
+
when "ChemicalWarning"
|
84
|
+
@warning = true
|
85
|
+
@warning_data = data
|
86
|
+
else do_unhandled(tag)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def pre_parse_node
|
91
|
+
return if @parser_type == "cdx"
|
92
|
+
|
93
|
+
@styled_text = cdxml_text(@parser.reader)
|
94
|
+
end
|
95
|
+
|
96
|
+
def post_parse_node
|
97
|
+
process_style
|
98
|
+
retrieve_bold_text
|
99
|
+
|
100
|
+
@center = Geometry::Point.new(@x, @y)
|
101
|
+
end
|
102
|
+
|
103
|
+
def remove_bold
|
104
|
+
@styled_text.delete_if { |s| (s[:face] & 1) == 1 }
|
105
|
+
process_style
|
106
|
+
end
|
107
|
+
|
108
|
+
def markdown
|
109
|
+
@styled_text.reduce("") do |md, style|
|
110
|
+
md += style[:bold] ? "**#{style[:text]}**" : style[:text]
|
111
|
+
md
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def bolded_styles
|
116
|
+
@styled_text.select { |s| s[:bold] }
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
def process_style
|
122
|
+
pos_cur = 0
|
123
|
+
@styled_text.each do |style|
|
124
|
+
style[:text] = to_unicode(style[:text])
|
125
|
+
style[:text].gsub!(/\r\n?/, "\n")
|
126
|
+
|
127
|
+
style[:position] = pos_cur
|
128
|
+
tlength = style[:text].size
|
129
|
+
style[:length] = tlength
|
130
|
+
pos_cur += tlength
|
131
|
+
|
132
|
+
fidx = @parser.font_table.find_index { |f| f[:id] == style[:font] }
|
133
|
+
next if fidx.nil?
|
134
|
+
|
135
|
+
font = @parser.font_table[fidx]
|
136
|
+
if font[:name] == "Symbol" && style[:face] & 1 != 1
|
137
|
+
t = style[:text].gsub(Regexp.union(GREEK_CHARS.keys), GREEK_CHARS)
|
138
|
+
style[:text] = t + " "
|
139
|
+
end
|
140
|
+
|
141
|
+
# User use superscript "_" as minus
|
142
|
+
style[:text] = "-" if style[:face] == 64 && style[:text] == "_"
|
143
|
+
style[:text] = style[:text].gsub("–", "-")
|
144
|
+
|
145
|
+
style[:bold] = (style[:face] & 1) == 1
|
146
|
+
end
|
147
|
+
|
148
|
+
# If "3-6" bold, "-" is originally not BOLD. Same for bolded "2a,b"
|
149
|
+
# Set bold for single "middle" character
|
150
|
+
set_special_bold
|
151
|
+
|
152
|
+
# Merge previous continuous bold text
|
153
|
+
merge_bold
|
154
|
+
end
|
155
|
+
|
156
|
+
def set_special_bold
|
157
|
+
return if @styled_text.count < 2
|
158
|
+
|
159
|
+
bold_ids = []
|
160
|
+
@styled_text.each_with_index do |style, idx|
|
161
|
+
next unless style[:bold]
|
162
|
+
|
163
|
+
prev_idx = bold_ids.last
|
164
|
+
bold_ids.push(idx)
|
165
|
+
next if idx.zero?
|
166
|
+
|
167
|
+
prev = @styled_text[idx - 1]
|
168
|
+
check = (
|
169
|
+
style[:position] == (prev[:position] + prev[:length]) &&
|
170
|
+
prev[:text].strip.length == 1 &&
|
171
|
+
prev_idx == idx - 2
|
172
|
+
)
|
173
|
+
next unless check
|
174
|
+
|
175
|
+
prev[:bold] = true
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def merge_bold
|
180
|
+
bold_ids = @styled_text.each_with_index.reduce([]) do |arr, (s, idx)|
|
181
|
+
arr.push(idx) if s[:bold]
|
182
|
+
|
183
|
+
arr
|
184
|
+
end
|
185
|
+
return if bold_ids.empty?
|
186
|
+
|
187
|
+
consecutive = [[bold_ids.last]]
|
188
|
+
bold_ids.reverse[1..-1].each do |idx|
|
189
|
+
sub_arr = consecutive.last
|
190
|
+
|
191
|
+
if sub_arr.last == idx + 1
|
192
|
+
sub_arr.push(idx)
|
193
|
+
else
|
194
|
+
consecutive.push([idx])
|
195
|
+
end
|
196
|
+
end
|
197
|
+
consecutive.reject! { |arr| arr.count == 1 }
|
198
|
+
|
199
|
+
consecutive.each do |ids|
|
200
|
+
ids[0..-2].each do |idx|
|
201
|
+
@styled_text[idx - 1][:text] += @styled_text[idx][:text]
|
202
|
+
@styled_text.delete_at(idx)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def to_unicode(text)
|
208
|
+
return text if text.encoding == Encoding::UTF_8
|
209
|
+
|
210
|
+
text.force_encoding(Encoding::CP1252)
|
211
|
+
text.encode(
|
212
|
+
Encoding::UTF_8,
|
213
|
+
invalid: :replace,
|
214
|
+
undef: :replace,
|
215
|
+
replace: "??",
|
216
|
+
)
|
217
|
+
end
|
218
|
+
|
219
|
+
def retrieve_bold_text
|
220
|
+
bold_arr, non_bold_arr = @styled_text.partition { |s| s[:bold] }
|
221
|
+
@bold_text = bold_arr.map { |x| x[:text] }.join(" ")
|
222
|
+
@bold_text.gsub!(/[,:\.] *$/, "")
|
223
|
+
@non_bold_text = non_bold_arr.map { |x| x[:text] }.join("")
|
224
|
+
@value = @styled_text.reduce("") { |mem, obj| "#{mem}#{obj[:text]}" }
|
225
|
+
|
226
|
+
# NOTE: Replace U+2219 to U+00B7
|
227
|
+
@bold_text = @bold_text.strip.gsub(/\r|\r\n/, "\n").gsub("∙", "·")
|
228
|
+
@non_bold_text = @non_bold_text.strip.
|
229
|
+
gsub(/\r|\r\n/, "\n").gsub("∙", "·")
|
230
|
+
@value = @value.strip.gsub(/\r|\r\n/, "\n").gsub("∙", "·")
|
231
|
+
end
|
232
|
+
|
233
|
+
def inspect
|
234
|
+
(
|
235
|
+
"#<Text: id=#{@id}, " +
|
236
|
+
"bold: #{@bold_text}, " +
|
237
|
+
"value: #{@value} >"
|
238
|
+
)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|