chem_scanner 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.rubocop.yml +604 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +20 -0
- data/LICENSE.txt +661 -0
- data/README.md +177 -0
- data/Rakefile +8 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/chem_scanner.gemspec +43 -0
- data/lib/chem_scanner.rb +79 -0
- data/lib/chem_scanner/cdx.rb +67 -0
- data/lib/chem_scanner/cdxml.rb +72 -0
- data/lib/chem_scanner/chem_draw/cdx_reader.rb +101 -0
- data/lib/chem_scanner/chem_draw/node/base_node.rb +123 -0
- data/lib/chem_scanner/chem_draw/node/base_value.rb +257 -0
- data/lib/chem_scanner/chem_draw/node/bond.rb +100 -0
- data/lib/chem_scanner/chem_draw/node/bracket_attachment.rb +17 -0
- data/lib/chem_scanner/chem_draw/node/bracket_group.rb +32 -0
- data/lib/chem_scanner/chem_draw/node/chem_geometry.rb +58 -0
- data/lib/chem_scanner/chem_draw/node/color_table.rb +46 -0
- data/lib/chem_scanner/chem_draw/node/font_table.rb +54 -0
- data/lib/chem_scanner/chem_draw/node/fragment.rb +149 -0
- data/lib/chem_scanner/chem_draw/node/fragment_node.rb +145 -0
- data/lib/chem_scanner/chem_draw/node/graphic.rb +94 -0
- data/lib/chem_scanner/chem_draw/node/text.rb +242 -0
- data/lib/chem_scanner/chem_draw/parser.rb +214 -0
- data/lib/chem_scanner/chem_draw/yaml/cdx_objects.yaml +32 -0
- data/lib/chem_scanner/chem_draw/yaml/cdx_props.yaml +263 -0
- data/lib/chem_scanner/chem_draw/yaml/cdxml_objects.yaml +36 -0
- data/lib/chem_scanner/chem_draw/yaml/cdxml_props.yaml +263 -0
- data/lib/chem_scanner/chem_draw/yaml/props_data_type.yaml +263 -0
- data/lib/chem_scanner/configuration/abbreviation.rb +76 -0
- data/lib/chem_scanner/configuration/superatom.rb +76 -0
- data/lib/chem_scanner/configuration/superatom.txt +2874 -0
- data/lib/chem_scanner/configuration/util.rb +40 -0
- data/lib/chem_scanner/configuration/yaml/abbreviations.yaml +6399 -0
- data/lib/chem_scanner/configuration/yaml/elements.yaml +115 -0
- data/lib/chem_scanner/configuration/yaml/solvents.yaml +16 -0
- data/lib/chem_scanner/doc.rb +56 -0
- data/lib/chem_scanner/docx.rb +86 -0
- data/lib/chem_scanner/export/cml.rb +176 -0
- data/lib/chem_scanner/extension/element_map.rb +9 -0
- data/lib/chem_scanner/extension/geometry/bounding_box.rb +84 -0
- data/lib/chem_scanner/extension/geometry/line.rb +123 -0
- data/lib/chem_scanner/extension/geometry/point.rb +18 -0
- data/lib/chem_scanner/extension/geometry/polygon.rb +115 -0
- data/lib/chem_scanner/extension/geometry/segment.rb +196 -0
- data/lib/chem_scanner/extension/passthrough.rb +7 -0
- data/lib/chem_scanner/interpreter/element/arrow.rb +298 -0
- data/lib/chem_scanner/interpreter/element/atom.rb +134 -0
- data/lib/chem_scanner/interpreter/element/fragment.rb +59 -0
- data/lib/chem_scanner/interpreter/element/molecule.rb +473 -0
- data/lib/chem_scanner/interpreter/element/molecule_group.rb +34 -0
- data/lib/chem_scanner/interpreter/element/reaction.rb +186 -0
- data/lib/chem_scanner/interpreter/element/reaction_step.rb +39 -0
- data/lib/chem_scanner/interpreter/formula_to_mol.rb +75 -0
- data/lib/chem_scanner/interpreter/post_process/assemble.rb +38 -0
- data/lib/chem_scanner/interpreter/post_process/label_by_molecule.rb +37 -0
- data/lib/chem_scanner/interpreter/post_process/reaction_info.rb +225 -0
- data/lib/chem_scanner/interpreter/post_process/reaction_step.rb +95 -0
- data/lib/chem_scanner/interpreter/post_process/reagent_label.rb +46 -0
- data/lib/chem_scanner/interpreter/post_process/text_as_molecule.rb +52 -0
- data/lib/chem_scanner/interpreter/post_process/text_label.rb +40 -0
- data/lib/chem_scanner/interpreter/pre_process/arrow.rb +197 -0
- data/lib/chem_scanner/interpreter/pre_process/graphic.rb +41 -0
- data/lib/chem_scanner/interpreter/pre_process/molecule.rb +150 -0
- data/lib/chem_scanner/interpreter/reaction_detection/assign_to_reaction.rb +129 -0
- data/lib/chem_scanner/interpreter/reaction_detection/duplicate_reagents.rb +50 -0
- data/lib/chem_scanner/interpreter/reaction_detection/molecule_group.rb +55 -0
- data/lib/chem_scanner/interpreter/reaction_detection/multi_line_chain_reaction.rb +85 -0
- data/lib/chem_scanner/interpreter/reaction_detection/remove_separated_mol.rb +115 -0
- data/lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb +166 -0
- data/lib/chem_scanner/interpreter/scheme.rb +173 -0
- data/lib/chem_scanner/interpreter/scheme_base.rb +64 -0
- data/lib/chem_scanner/interpreter/text_group/bold_groups.rb +183 -0
- data/lib/chem_scanner/interpreter/text_group/molecule_text_group.rb +138 -0
- data/lib/chem_scanner/interpreter/text_group/reaction_text_groups.rb +221 -0
- data/lib/chem_scanner/interpreter/text_group/retrieve_alias_info.rb +41 -0
- data/lib/chem_scanner/interpreter/text_group/retrieve_n_atoms.rb +106 -0
- data/lib/chem_scanner/interpreter/text_group/text_group_interpreter.rb +92 -0
- data/lib/chem_scanner/perkin_eln.rb +287 -0
- data/lib/chem_scanner/version.rb +5 -0
- data/lib/rubygems_plugin.rb +5 -0
- metadata +244 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ChemScanner
|
|
4
|
+
module Interpreter
|
|
5
|
+
using Extension
|
|
6
|
+
|
|
7
|
+
module ReactionDetection
|
|
8
|
+
def process_reactions_step
|
|
9
|
+
@reactions.each { |r| detect_reaction_step(r) }
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def detect_reaction_step(reaction)
|
|
13
|
+
number_ref = [
|
|
14
|
+
["1", "2", "3", "4", "5", "6", "7", "8", "9"],
|
|
15
|
+
["I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX"],
|
|
16
|
+
["i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix"],
|
|
17
|
+
["A", "B", "C", "D", "E", "F", "G", "H", "J"],
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
regex_list = [
|
|
21
|
+
/(^|\A)(([1-9a-z]{0,3}) *[)\.] *(.*))($|\z)/i,
|
|
22
|
+
/(^|\A)\((([1-9a-z]{0,3}) *\) *(.*))($|\z)/i,
|
|
23
|
+
]
|
|
24
|
+
check = false
|
|
25
|
+
|
|
26
|
+
list_matched = []
|
|
27
|
+
list_numbered = []
|
|
28
|
+
regex_list.each do |regex|
|
|
29
|
+
next if check
|
|
30
|
+
|
|
31
|
+
list_matched = reaction.description.enum_for(:scan, regex).map {
|
|
32
|
+
Regexp.last_match
|
|
33
|
+
}
|
|
34
|
+
list_numbered = list_matched.map { |x| x[3] }
|
|
35
|
+
next if list_numbered.empty?
|
|
36
|
+
|
|
37
|
+
number_ref.each do |ref|
|
|
38
|
+
check = true if ref & list_numbered == list_numbered
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
return unless check && list_numbered.count >= 2
|
|
43
|
+
|
|
44
|
+
flatten_ref = number_ref.flatten
|
|
45
|
+
check_temperature = false
|
|
46
|
+
check_time = false
|
|
47
|
+
list_position = list_matched.map { |x| x.begin(0) }
|
|
48
|
+
|
|
49
|
+
list_matched.each_with_index.map do |matched, idx|
|
|
50
|
+
next_pos = list_position[idx + 1] || -1
|
|
51
|
+
next_pos = next_pos.negative? ? next_pos : (next_pos - 1)
|
|
52
|
+
description = reaction.description[list_position[idx]..next_pos]
|
|
53
|
+
text_start_pos = if matched[4].empty?
|
|
54
|
+
m2 = matched[2]
|
|
55
|
+
description.index(m2) + m2.size
|
|
56
|
+
else
|
|
57
|
+
description.index(matched[4]) || 0
|
|
58
|
+
end
|
|
59
|
+
description = description[text_start_pos..-1]
|
|
60
|
+
temperature, _, time = extract_reaction_info([description])
|
|
61
|
+
|
|
62
|
+
step = ReactionStep.new
|
|
63
|
+
step.temperature = temperature
|
|
64
|
+
step.time = time
|
|
65
|
+
step.description = description
|
|
66
|
+
step.number = (flatten_ref.index(matched[3]) % 9) + 1
|
|
67
|
+
|
|
68
|
+
check_time = !time.empty?
|
|
69
|
+
check_temperature = !temperature.empty?
|
|
70
|
+
|
|
71
|
+
reaction.reagent_abbs.each do |abb|
|
|
72
|
+
next unless description.include?(abb)
|
|
73
|
+
|
|
74
|
+
step.reagents.push(ChemScanner.get_abbreviation(abb))
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
reaction.steps.push(step)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
reaction.time = "" if check_time
|
|
81
|
+
reaction.temperature = "" if check_temperature
|
|
82
|
+
|
|
83
|
+
# NOTE: tempo tricky assign reagents to empty step
|
|
84
|
+
return if reaction.reagents.count != 1
|
|
85
|
+
|
|
86
|
+
empty_steps = reaction.steps.select do |s|
|
|
87
|
+
s.description.empty? || s.description == "\n"
|
|
88
|
+
end
|
|
89
|
+
return if empty_steps.count != 1
|
|
90
|
+
|
|
91
|
+
empty_steps.first.reagents.push(reaction.reagents.first.cano_smiles)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ChemScanner
|
|
4
|
+
module Interpreter
|
|
5
|
+
using Extension
|
|
6
|
+
|
|
7
|
+
module PostProcess
|
|
8
|
+
def refine_reagents_label
|
|
9
|
+
@reactions.each do |r|
|
|
10
|
+
added_arr = []
|
|
11
|
+
|
|
12
|
+
@arrow_map[r.arrow_id].text_arr.each do |tid|
|
|
13
|
+
text = @text_map[tid]
|
|
14
|
+
bold = text.bold_text
|
|
15
|
+
next if bold.strip.empty?
|
|
16
|
+
|
|
17
|
+
mol_id = r.reagent_ids.detect { |id| @mol_map[id].label == bold }
|
|
18
|
+
next unless mol_id.nil?
|
|
19
|
+
|
|
20
|
+
min_dist = { key: 0, value: 9_999_999 }
|
|
21
|
+
r.reagent_ids.each do |rid|
|
|
22
|
+
reagent = @mol_map[rid]
|
|
23
|
+
dist = reagent.min_distance_to_point(text.polygon.center)
|
|
24
|
+
min_dist = { key: rid, value: dist } if dist < min_dist[:value]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
if min_dist[:key].positive?
|
|
28
|
+
added_arr.push(text: tid, reagent: min_dist[:key])
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
added_arr.each do |added|
|
|
33
|
+
text = @text_map[added[:text]]
|
|
34
|
+
r.text_ids.delete(text.id)
|
|
35
|
+
@arrow_map[r.arrow_id].text_arr.delete(text.id)
|
|
36
|
+
reagent = @mol_map[added[:reagent]]
|
|
37
|
+
reagent.text_ids.push(text.id)
|
|
38
|
+
assemble_molecule_text(reagent)
|
|
39
|
+
# reagent.label = text.bold_text.strip
|
|
40
|
+
# text.remove_bold
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module ChemScanner
|
|
5
|
+
# Interpreter of extracted/scanned information
|
|
6
|
+
module Interpreter
|
|
7
|
+
using Extension
|
|
8
|
+
|
|
9
|
+
module PostProcess
|
|
10
|
+
def refine_text_as_molecule
|
|
11
|
+
key_to_delete = []
|
|
12
|
+
|
|
13
|
+
@text_map.each do |k, text|
|
|
14
|
+
mol = @mol_map.values.detect { |m| m.text_ids.include?(k) }
|
|
15
|
+
next if mol.nil?
|
|
16
|
+
|
|
17
|
+
smi = ChemScanner.get_abbreviation(text.value)
|
|
18
|
+
next if smi.empty?
|
|
19
|
+
|
|
20
|
+
group_pos = {}
|
|
21
|
+
@reactions.each do |reaction|
|
|
22
|
+
rid = reaction.arrow_id
|
|
23
|
+
arrow = @arrow_map[rid]
|
|
24
|
+
group = detect_position(arrow, text.polygon)
|
|
25
|
+
next if group.nil?
|
|
26
|
+
|
|
27
|
+
group_pos[rid] = group
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
pos = group_pos.detect { |_, p| p == "reagents" }
|
|
31
|
+
next unless pos.nil?
|
|
32
|
+
|
|
33
|
+
pos = group_pos.detect { |_, p| %w[reactants products].include?(p) }
|
|
34
|
+
next if pos.nil?
|
|
35
|
+
|
|
36
|
+
puts "group: #{group_pos}"
|
|
37
|
+
key_to_delete.push(k)
|
|
38
|
+
mol.text_ids.delete(k)
|
|
39
|
+
@mol_map[k] = Molecule.new_from_smiles(k, smi)
|
|
40
|
+
|
|
41
|
+
pos = group_pos.first
|
|
42
|
+
reaction = @reactions.detect { |r| r.arrow_id == pos[0] }
|
|
43
|
+
group_ids = reaction.send("#{pos[1][0...-1]}_ids")
|
|
44
|
+
group_ids.push(k)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Don't need to keep it text_map anymore
|
|
48
|
+
key_to_delete.each { |k| @text_map.delete(k) }
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module ChemScanner
|
|
5
|
+
# Interpreter of extracted/scanned information
|
|
6
|
+
module Interpreter
|
|
7
|
+
using Extension
|
|
8
|
+
|
|
9
|
+
module PostProcess
|
|
10
|
+
# text_id could be both on text_map and mol_group_map
|
|
11
|
+
# Text-as-label, e.g. "ligand = ", "amide = "
|
|
12
|
+
def refine_text_label
|
|
13
|
+
@mol_map.select { |_, m| m.text.strip[-1] == "=" }.each do |mid, mol|
|
|
14
|
+
label_text = mol.text.strip.chomp("=").strip
|
|
15
|
+
existed = false
|
|
16
|
+
|
|
17
|
+
@reactions.each do |r|
|
|
18
|
+
@arrow_map[r.arrow_id].text_arr.each do |tid|
|
|
19
|
+
text = @text_map[tid]
|
|
20
|
+
next unless text.value.include?(label_text)
|
|
21
|
+
|
|
22
|
+
existed = true
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
r.reagent_ids.push(mid) unless r.reagent_ids.include?(mid)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
next unless existed
|
|
29
|
+
|
|
30
|
+
@reactions.each do |r|
|
|
31
|
+
%w[reactant product].each do |group|
|
|
32
|
+
group_ids = r.send("#{group}_ids")
|
|
33
|
+
group_ids.delete(mid) if group_ids.include?(mid)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ChemScanner
|
|
4
|
+
module Interpreter
|
|
5
|
+
using Extension
|
|
6
|
+
|
|
7
|
+
ESTIMATED_DIST = 0.2
|
|
8
|
+
|
|
9
|
+
module PreProcess
|
|
10
|
+
# - Detect cross arrow from line map
|
|
11
|
+
# - Attach "extend" line to arrow
|
|
12
|
+
def refine_arrow
|
|
13
|
+
detect_line_fragment
|
|
14
|
+
|
|
15
|
+
# Headless arrow ~ line, part of the real arrow
|
|
16
|
+
segment_keys = @geometry_map.select { |_, g| g.headless }.keys
|
|
17
|
+
segment_keys.each do |k|
|
|
18
|
+
segment = @geometry_map.delete(k)
|
|
19
|
+
tail = Geometry::Point.new(segment.tail[:x], segment.tail[:y])
|
|
20
|
+
head = Geometry::Point.new(segment.head[:x], segment.head[:y])
|
|
21
|
+
|
|
22
|
+
@segment_map[k] = Geometry::Segment.new(tail, head)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
segment_keys = @graphic_map.select { |_, g| g.line? }.keys
|
|
26
|
+
segment_keys.each do |k|
|
|
27
|
+
segment = @graphic_map.delete(k)
|
|
28
|
+
tail = Geometry::Point.new(segment.tail[:x], segment.tail[:y])
|
|
29
|
+
head = Geometry::Point.new(segment.head[:x], segment.head[:y])
|
|
30
|
+
|
|
31
|
+
@segment_map[k] = Geometry::Segment.new(tail, head)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# |
|
|
35
|
+
# ---->|
|
|
36
|
+
# |
|
|
37
|
+
# V
|
|
38
|
+
arrow_graphic = @graphic_map.reject { |_, g| g.head.nil? || g.tail.nil? }
|
|
39
|
+
all_arrow = @geometry_map.merge(arrow_graphic)
|
|
40
|
+
all_arrow.each do |key, geometry|
|
|
41
|
+
arrow = Arrow.new(geometry)
|
|
42
|
+
@arrow_map[key] = arrow
|
|
43
|
+
aseg = geometry.segment
|
|
44
|
+
line = aseg.to_line
|
|
45
|
+
|
|
46
|
+
all_arrow.except(key).each do |_, other|
|
|
47
|
+
oseg = other.segment
|
|
48
|
+
next unless line.intersects_with_segment?(oseg)
|
|
49
|
+
|
|
50
|
+
point = line.intersection_points_with(oseg.to_line)
|
|
51
|
+
next unless oseg.contains_point?(point)
|
|
52
|
+
|
|
53
|
+
# |
|
|
54
|
+
# |
|
|
55
|
+
# ----|->
|
|
56
|
+
# |
|
|
57
|
+
# |
|
|
58
|
+
# v
|
|
59
|
+
# NOTE: due to manually drawing,
|
|
60
|
+
# the intersection point may not exactly the head of the arrow
|
|
61
|
+
next if Geometry.distance(arrow.head, point) > ESTIMATED_DIST
|
|
62
|
+
|
|
63
|
+
# If it intersect with any other geometry
|
|
64
|
+
arrow.change_head(other.head)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# \
|
|
69
|
+
# --\-->
|
|
70
|
+
# \
|
|
71
|
+
# Same effect as "nogo" attritbue
|
|
72
|
+
try_check_cross
|
|
73
|
+
|
|
74
|
+
# -----|
|
|
75
|
+
# |
|
|
76
|
+
# V
|
|
77
|
+
try_extend_tail
|
|
78
|
+
|
|
79
|
+
# |------>
|
|
80
|
+
# |
|
|
81
|
+
# -----|
|
|
82
|
+
# |
|
|
83
|
+
# |------>
|
|
84
|
+
try_extend_split
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# - Check text within mol
|
|
88
|
+
# - Detect if there are any "arrow" molecule, ( straight C bonds: ----- )
|
|
89
|
+
# which people drawing to be viewed as an arrow
|
|
90
|
+
def detect_line_fragment
|
|
91
|
+
remove_keys = []
|
|
92
|
+
|
|
93
|
+
@fragment_map.each do |key, fragment|
|
|
94
|
+
# Check if user draw a molecule as an "extended" arrow
|
|
95
|
+
next unless fragment.line?
|
|
96
|
+
|
|
97
|
+
remove_keys.push(key)
|
|
98
|
+
@fragment_as_line += 1
|
|
99
|
+
|
|
100
|
+
nodes = fragment.node_map.values
|
|
101
|
+
is_vertical = nodes.map(&:y).uniq.count == 1
|
|
102
|
+
sorted_atoms = nodes.sort_by { |atom| is_vertical ? atom.y : atom.x }
|
|
103
|
+
segment = Geometry::Segment.new(sorted_atoms.first, sorted_atoms.last)
|
|
104
|
+
|
|
105
|
+
@segment_map[key] = segment
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
remove_keys.each { |k| @fragment_map.delete(k) }
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Try to extend base arrow if possible
|
|
112
|
+
def try_extend_tail
|
|
113
|
+
arrow_new_tail = {}
|
|
114
|
+
@segment_map.each do |key, seg|
|
|
115
|
+
@arrow_map.each_value do |arrow|
|
|
116
|
+
dist1 = Geometry.distance(seg.point1, arrow.tail)
|
|
117
|
+
dist2 = Geometry.distance(seg.point2, arrow.tail)
|
|
118
|
+
if dist1 <= dist2
|
|
119
|
+
dist = dist1
|
|
120
|
+
point = seg.point2
|
|
121
|
+
else
|
|
122
|
+
dist = dist2
|
|
123
|
+
point = seg.point1
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
next if dist > ESTIMATED_DIST
|
|
127
|
+
|
|
128
|
+
arrow_new_tail[arrow.id] = { skey: key, point: point }
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
arrow_new_tail.each do |aid, tail_info|
|
|
133
|
+
@segment_map.delete(tail_info[:skey])
|
|
134
|
+
arrow = @arrow_map[aid]
|
|
135
|
+
arrow.change_tail(tail_info[:point])
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def try_extend_split
|
|
140
|
+
arrow_new_split = {}
|
|
141
|
+
|
|
142
|
+
@segment_map.each do |key, segment|
|
|
143
|
+
line = segment.to_line
|
|
144
|
+
|
|
145
|
+
@arrow_map.each_value do |arrow|
|
|
146
|
+
asegment = arrow.tail_segment
|
|
147
|
+
next unless line.intersects_with_segment?(asegment)
|
|
148
|
+
|
|
149
|
+
point = line.intersection_points_with(asegment.to_line)
|
|
150
|
+
dist1 = Geometry.distance(segment.point1, point)
|
|
151
|
+
dist2 = Geometry.distance(segment.point2, point)
|
|
152
|
+
next if [dist1, dist2].min > ESTIMATED_DIST
|
|
153
|
+
|
|
154
|
+
tail_point = dist1 < dist2 ? segment.point2 : segment.point1
|
|
155
|
+
arrow_new_split[arrow.id] = {
|
|
156
|
+
key: key,
|
|
157
|
+
point: point,
|
|
158
|
+
tpoint: tail_point,
|
|
159
|
+
}
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
arrow_new_split.each do |aid, split_info|
|
|
164
|
+
arrow = @arrow_map[aid]
|
|
165
|
+
arrow.update_tail(split_info[:point])
|
|
166
|
+
arrow.change_tail(split_info[:tpoint])
|
|
167
|
+
|
|
168
|
+
@segment_map.delete(split_info[:skey])
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def try_check_cross
|
|
173
|
+
@arrow_map.each_value do |arrow|
|
|
174
|
+
next if arrow.cross
|
|
175
|
+
|
|
176
|
+
keys = []
|
|
177
|
+
@segment_map.each do |key, seg|
|
|
178
|
+
arrow.segments.each do |aseg|
|
|
179
|
+
next unless seg.intersects_with?(aseg)
|
|
180
|
+
|
|
181
|
+
pintersect = seg.intersection_point_with(aseg)
|
|
182
|
+
check = aseg.contains_point?(pintersect) \
|
|
183
|
+
&& seg.point_in_range(pintersect, 3.0 / 5.0)
|
|
184
|
+
next unless check
|
|
185
|
+
|
|
186
|
+
# Add to the "polyline" of arrow
|
|
187
|
+
arrow.add_cross_segment(seg)
|
|
188
|
+
keys.push(key)
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
keys.each { |key| @segment_map.delete(key) }
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ChemScanner
|
|
4
|
+
module Interpreter
|
|
5
|
+
using Extension
|
|
6
|
+
|
|
7
|
+
module PreProcess
|
|
8
|
+
def find_fragment_inside_rectangle
|
|
9
|
+
# 3 = Rectangle
|
|
10
|
+
@graphic_map.select do |_, v|
|
|
11
|
+
v.type == 3 && v.bounding_box.area < 100
|
|
12
|
+
end.each do |_, graphic|
|
|
13
|
+
@fragment_map.each_value do |fragment|
|
|
14
|
+
next unless graphic.polygon.contains_polygon?(fragment.polygon)
|
|
15
|
+
|
|
16
|
+
fragment.boxed = true
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
@fragment_group_map.each do |_, fgroup|
|
|
20
|
+
fmap = fgroup[:fragment_map]
|
|
21
|
+
next unless fmap.values.count == 1
|
|
22
|
+
|
|
23
|
+
text = fgroup[:title]
|
|
24
|
+
next unless graphic.polygon.contains_polygon?(text.polygon)
|
|
25
|
+
|
|
26
|
+
fragment = fmap.values.first
|
|
27
|
+
fragment.boxed = true
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def extract_fragment_graphic
|
|
33
|
+
@fragment_map.each_value do |fragment|
|
|
34
|
+
next if fragment.graphic_map.empty?
|
|
35
|
+
|
|
36
|
+
@graphic_map.merge!(fragment.graphic_map)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|