chem_scanner 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.rubocop.yml +604 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +20 -0
- data/LICENSE.txt +661 -0
- data/README.md +177 -0
- data/Rakefile +8 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/chem_scanner.gemspec +43 -0
- data/lib/chem_scanner.rb +79 -0
- data/lib/chem_scanner/cdx.rb +67 -0
- data/lib/chem_scanner/cdxml.rb +72 -0
- data/lib/chem_scanner/chem_draw/cdx_reader.rb +101 -0
- data/lib/chem_scanner/chem_draw/node/base_node.rb +123 -0
- data/lib/chem_scanner/chem_draw/node/base_value.rb +257 -0
- data/lib/chem_scanner/chem_draw/node/bond.rb +100 -0
- data/lib/chem_scanner/chem_draw/node/bracket_attachment.rb +17 -0
- data/lib/chem_scanner/chem_draw/node/bracket_group.rb +32 -0
- data/lib/chem_scanner/chem_draw/node/chem_geometry.rb +58 -0
- data/lib/chem_scanner/chem_draw/node/color_table.rb +46 -0
- data/lib/chem_scanner/chem_draw/node/font_table.rb +54 -0
- data/lib/chem_scanner/chem_draw/node/fragment.rb +149 -0
- data/lib/chem_scanner/chem_draw/node/fragment_node.rb +145 -0
- data/lib/chem_scanner/chem_draw/node/graphic.rb +94 -0
- data/lib/chem_scanner/chem_draw/node/text.rb +242 -0
- data/lib/chem_scanner/chem_draw/parser.rb +214 -0
- data/lib/chem_scanner/chem_draw/yaml/cdx_objects.yaml +32 -0
- data/lib/chem_scanner/chem_draw/yaml/cdx_props.yaml +263 -0
- data/lib/chem_scanner/chem_draw/yaml/cdxml_objects.yaml +36 -0
- data/lib/chem_scanner/chem_draw/yaml/cdxml_props.yaml +263 -0
- data/lib/chem_scanner/chem_draw/yaml/props_data_type.yaml +263 -0
- data/lib/chem_scanner/configuration/abbreviation.rb +76 -0
- data/lib/chem_scanner/configuration/superatom.rb +76 -0
- data/lib/chem_scanner/configuration/superatom.txt +2874 -0
- data/lib/chem_scanner/configuration/util.rb +40 -0
- data/lib/chem_scanner/configuration/yaml/abbreviations.yaml +6399 -0
- data/lib/chem_scanner/configuration/yaml/elements.yaml +115 -0
- data/lib/chem_scanner/configuration/yaml/solvents.yaml +16 -0
- data/lib/chem_scanner/doc.rb +56 -0
- data/lib/chem_scanner/docx.rb +86 -0
- data/lib/chem_scanner/export/cml.rb +176 -0
- data/lib/chem_scanner/extension/element_map.rb +9 -0
- data/lib/chem_scanner/extension/geometry/bounding_box.rb +84 -0
- data/lib/chem_scanner/extension/geometry/line.rb +123 -0
- data/lib/chem_scanner/extension/geometry/point.rb +18 -0
- data/lib/chem_scanner/extension/geometry/polygon.rb +115 -0
- data/lib/chem_scanner/extension/geometry/segment.rb +196 -0
- data/lib/chem_scanner/extension/passthrough.rb +7 -0
- data/lib/chem_scanner/interpreter/element/arrow.rb +298 -0
- data/lib/chem_scanner/interpreter/element/atom.rb +134 -0
- data/lib/chem_scanner/interpreter/element/fragment.rb +59 -0
- data/lib/chem_scanner/interpreter/element/molecule.rb +473 -0
- data/lib/chem_scanner/interpreter/element/molecule_group.rb +34 -0
- data/lib/chem_scanner/interpreter/element/reaction.rb +186 -0
- data/lib/chem_scanner/interpreter/element/reaction_step.rb +39 -0
- data/lib/chem_scanner/interpreter/formula_to_mol.rb +75 -0
- data/lib/chem_scanner/interpreter/post_process/assemble.rb +38 -0
- data/lib/chem_scanner/interpreter/post_process/label_by_molecule.rb +37 -0
- data/lib/chem_scanner/interpreter/post_process/reaction_info.rb +225 -0
- data/lib/chem_scanner/interpreter/post_process/reaction_step.rb +95 -0
- data/lib/chem_scanner/interpreter/post_process/reagent_label.rb +46 -0
- data/lib/chem_scanner/interpreter/post_process/text_as_molecule.rb +52 -0
- data/lib/chem_scanner/interpreter/post_process/text_label.rb +40 -0
- data/lib/chem_scanner/interpreter/pre_process/arrow.rb +197 -0
- data/lib/chem_scanner/interpreter/pre_process/graphic.rb +41 -0
- data/lib/chem_scanner/interpreter/pre_process/molecule.rb +150 -0
- data/lib/chem_scanner/interpreter/reaction_detection/assign_to_reaction.rb +129 -0
- data/lib/chem_scanner/interpreter/reaction_detection/duplicate_reagents.rb +50 -0
- data/lib/chem_scanner/interpreter/reaction_detection/molecule_group.rb +55 -0
- data/lib/chem_scanner/interpreter/reaction_detection/multi_line_chain_reaction.rb +85 -0
- data/lib/chem_scanner/interpreter/reaction_detection/remove_separated_mol.rb +115 -0
- data/lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb +166 -0
- data/lib/chem_scanner/interpreter/scheme.rb +173 -0
- data/lib/chem_scanner/interpreter/scheme_base.rb +64 -0
- data/lib/chem_scanner/interpreter/text_group/bold_groups.rb +183 -0
- data/lib/chem_scanner/interpreter/text_group/molecule_text_group.rb +138 -0
- data/lib/chem_scanner/interpreter/text_group/reaction_text_groups.rb +221 -0
- data/lib/chem_scanner/interpreter/text_group/retrieve_alias_info.rb +41 -0
- data/lib/chem_scanner/interpreter/text_group/retrieve_n_atoms.rb +106 -0
- data/lib/chem_scanner/interpreter/text_group/text_group_interpreter.rb +92 -0
- data/lib/chem_scanner/perkin_eln.rb +287 -0
- data/lib/chem_scanner/version.rb +5 -0
- data/lib/rubygems_plugin.rb +5 -0
- metadata +244 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ChemScanner
|
|
4
|
+
module Interpreter
|
|
5
|
+
using Extension
|
|
6
|
+
|
|
7
|
+
module ReactionDetection
|
|
8
|
+
# (1): A ---> C
|
|
9
|
+
#
|
|
10
|
+
# (2): B ---> D
|
|
11
|
+
# |
|
|
12
|
+
# |
|
|
13
|
+
# V
|
|
14
|
+
# E
|
|
15
|
+
# Remove C from (2)
|
|
16
|
+
def remove_separated_mol
|
|
17
|
+
dist_gap = 2.0
|
|
18
|
+
|
|
19
|
+
@reactions.each do |r|
|
|
20
|
+
arrow = @arrow_map[r.arrow_id]
|
|
21
|
+
|
|
22
|
+
%w[reactant_ids product_ids].each do |group|
|
|
23
|
+
rgroup = r.send(group)
|
|
24
|
+
next if rgroup.count < 2
|
|
25
|
+
|
|
26
|
+
# Distance map of 1 molecule to arrow
|
|
27
|
+
# and other molecules within group
|
|
28
|
+
dist_map = distance_molecule_group(rgroup, arrow, group)
|
|
29
|
+
min_dist = dist_map.min_by { |_, value| value }.last
|
|
30
|
+
|
|
31
|
+
remove_map = dist_map.select do |k, v|
|
|
32
|
+
dist_check = v > (dist_gap * min_dist)
|
|
33
|
+
next unless dist_check
|
|
34
|
+
|
|
35
|
+
in_other = @reactions.select do |other|
|
|
36
|
+
check = (
|
|
37
|
+
other.arrow_id != r.arrow_id &&
|
|
38
|
+
other.molecule_ids.include?(k)
|
|
39
|
+
)
|
|
40
|
+
next unless check
|
|
41
|
+
|
|
42
|
+
oarrow = @arrow_map[other.arrow_id]
|
|
43
|
+
!arrow.parallel_to?(oarrow)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
in_other.count > 0
|
|
47
|
+
end
|
|
48
|
+
remove_keys = remove_map.keys
|
|
49
|
+
|
|
50
|
+
remove_map.each_key do |k|
|
|
51
|
+
mol = @mol_map[k]
|
|
52
|
+
next if mol.nil?
|
|
53
|
+
|
|
54
|
+
(rgroup - [k]).each do |id|
|
|
55
|
+
om = @mol_map[id]
|
|
56
|
+
next if om.nil?
|
|
57
|
+
|
|
58
|
+
d = Geometry.distance(mol.polygon.center, om.polygon.center)
|
|
59
|
+
|
|
60
|
+
remove_keys.push(id) if d < (dist_gap * min_dist)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
rgroup.delete_if { |x| remove_keys.include?(x) }
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def distance_molecule_group(rgroup, arrow, group)
|
|
70
|
+
dist_map = {}
|
|
71
|
+
intersect_points_with_line = ->(id, line) do
|
|
72
|
+
@mol_map[id].polygon.intersection_points_with_line(line)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
if group == "reactant_ids"
|
|
76
|
+
apoint = arrow.tail
|
|
77
|
+
aline = arrow.tail_segment.to_line
|
|
78
|
+
else
|
|
79
|
+
apoint = arrow.head
|
|
80
|
+
aline = arrow.head_segment.to_line
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
rgroup.each do |id|
|
|
84
|
+
next unless @mol_map.key?(id)
|
|
85
|
+
|
|
86
|
+
# Distance to arrow
|
|
87
|
+
inter_points = intersect_points_with_line.call(id, aline)
|
|
88
|
+
da = 9_999_999
|
|
89
|
+
inter_points.each do |point|
|
|
90
|
+
length = Geometry.distance(apoint, point)
|
|
91
|
+
da = length if length < da
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Distance to other molecule within group
|
|
95
|
+
dmols = 9_999_999
|
|
96
|
+
(rgroup - [id]).each do |mid|
|
|
97
|
+
other = @mol_map[mid]
|
|
98
|
+
next if other.nil?
|
|
99
|
+
|
|
100
|
+
intersect_points_with_line.call(mid, aline).each do |op|
|
|
101
|
+
inter_points.each do |p|
|
|
102
|
+
length = Geometry.distance(p, op)
|
|
103
|
+
dmols = length if length < dmols
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
dist_map[id] = [da, dmols].min
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
dist_map
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ChemScanner
|
|
4
|
+
module Interpreter
|
|
5
|
+
using Extension
|
|
6
|
+
|
|
7
|
+
module ReactionDetection
|
|
8
|
+
# Attach/bind text to molecule or arrow
|
|
9
|
+
def assign_text
|
|
10
|
+
tgroup_ids = @mol_group_map.keys
|
|
11
|
+
text_as_mol_ids = []
|
|
12
|
+
|
|
13
|
+
@text_map.each do |k, text|
|
|
14
|
+
group = try_detect_label_position(text)
|
|
15
|
+
center = text.polygon.center
|
|
16
|
+
|
|
17
|
+
min_mol = nearest_molecule(center)
|
|
18
|
+
min_arrow = nearest_arrow(text)
|
|
19
|
+
arrow = @arrow_map[min_arrow.key]
|
|
20
|
+
|
|
21
|
+
if arrow.nil?
|
|
22
|
+
mol_key = min_mol.key
|
|
23
|
+
|
|
24
|
+
if group.nil?
|
|
25
|
+
@mol_map[mol_key].text_ids.push(k)
|
|
26
|
+
else
|
|
27
|
+
text_as_mol_ids.push(id: k, mol: mol_key, group: group)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
next
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
if min_mol.key.zero?
|
|
34
|
+
arrow.text_arr.push(min_arrow.key)
|
|
35
|
+
next
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
to_arrow = (
|
|
39
|
+
min_arrow.value < min_mol.value * 2.5 &&
|
|
40
|
+
text_around_arrow?(arrow, text, min_arrow.value)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
if to_arrow
|
|
44
|
+
arrow.text_arr.push(k)
|
|
45
|
+
next
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Do not add a molecule-group text to molecule as description
|
|
49
|
+
@mol_map[min_mol.key].text_ids.push(k) unless tgroup_ids.include?(k)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
text_as_mol_ids.each do |tinfo|
|
|
53
|
+
tid = tinfo[:id]
|
|
54
|
+
text = @text_map[tid]
|
|
55
|
+
mid = tinfo[:mol]
|
|
56
|
+
mol = @mol_map.values.detect { |m| m.label == text.bold_text }
|
|
57
|
+
|
|
58
|
+
if mol.nil?
|
|
59
|
+
@mol_map[mid].text_ids.push(tid)
|
|
60
|
+
else
|
|
61
|
+
rid = tinfo[:group].keys.first
|
|
62
|
+
group = tinfo[:group][rid]
|
|
63
|
+
reaction = @reactions.detect { |r| r.arrow_id == rid }
|
|
64
|
+
rgroup = reaction.send("#{group[0..-2]}_ids")
|
|
65
|
+
rgroup.push(mol.id).uniq!
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
@mol_map.each_value { |mol| assemble_molecule_text(mol) }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def try_detect_label_position(text)
|
|
73
|
+
return nil if text.value != text.bold_text
|
|
74
|
+
|
|
75
|
+
group_pos = {}
|
|
76
|
+
@reactions.each do |reaction|
|
|
77
|
+
rid = reaction.arrow_id
|
|
78
|
+
arrow = @arrow_map[rid]
|
|
79
|
+
group = detect_position(arrow, text.polygon)
|
|
80
|
+
next if group.nil?
|
|
81
|
+
|
|
82
|
+
group_pos[rid] = group
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
return nil unless group_pos.size == 1
|
|
86
|
+
|
|
87
|
+
pos = group_pos.values.first
|
|
88
|
+
return nil unless %w[reactants products].include?(pos)
|
|
89
|
+
|
|
90
|
+
group_pos
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def nearest_molecule(point)
|
|
94
|
+
min_mol = OpenStruct.new(key: 0, value: 9_999_999)
|
|
95
|
+
|
|
96
|
+
@mol_map.each do |okey, mol|
|
|
97
|
+
dist = mol.min_distance_to_point(point)
|
|
98
|
+
|
|
99
|
+
if dist < min_mol.value
|
|
100
|
+
min_mol.key = okey
|
|
101
|
+
min_mol.value = dist
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
min_mol
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def nearest_arrow(text)
|
|
109
|
+
min_arrow = OpenStruct.new(key: 0, value: 9_999_999)
|
|
110
|
+
tpoly = text.polygon
|
|
111
|
+
|
|
112
|
+
@arrow_map.each do |okey, arrow|
|
|
113
|
+
arrow.segments.each do |segment|
|
|
114
|
+
ppoint = segment.to_line.point_projection(tpoly.center)
|
|
115
|
+
seg_contains = segment.contains_point?(ppoint)
|
|
116
|
+
next unless seg_contains
|
|
117
|
+
|
|
118
|
+
dist = segment.distance_to_boundingbox(tpoly)
|
|
119
|
+
|
|
120
|
+
if dist < min_arrow.value
|
|
121
|
+
min_arrow.key = okey
|
|
122
|
+
min_arrow.value = dist
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
min_arrow
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def text_around_arrow?(arrow, text, dist)
|
|
131
|
+
tpoly = text.polygon
|
|
132
|
+
is_middle = arrow.poly_in_middle?(text.polygon)
|
|
133
|
+
return false unless is_middle
|
|
134
|
+
|
|
135
|
+
pheight = [tpoly.width, tpoly.height].max
|
|
136
|
+
arrow.build_polygons(pheight + dist)
|
|
137
|
+
cur_height = arrow.height
|
|
138
|
+
arrow.build_polygons(cur_height)
|
|
139
|
+
|
|
140
|
+
tcenter = tpoly.center
|
|
141
|
+
reaction = @reactions.detect { |r| r.arrow_id == arrow.id }
|
|
142
|
+
arrow.segments.each do |aseg|
|
|
143
|
+
pseg = aseg.perpen_segment_via_point(tcenter)
|
|
144
|
+
check_contains = (
|
|
145
|
+
aseg.contains_point?(pseg.point1) ||
|
|
146
|
+
aseg.contains_point?(pseg.point2)
|
|
147
|
+
)
|
|
148
|
+
mol_ids = molecules_intersects_with_segment(pseg)
|
|
149
|
+
mol_ids = mol_ids - reaction.reagent_ids
|
|
150
|
+
return true if mol_ids.empty? && check_contains
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
false
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def molecules_intersects_with_segment(segment)
|
|
157
|
+
ids = []
|
|
158
|
+
@mol_map.each do |key, mol|
|
|
159
|
+
ids.push(key) if segment.intersects_with_polygon?(mol.polygon)
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
ids
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module ChemScanner
|
|
5
|
+
module Interpreter
|
|
6
|
+
Gem.find_files("chem_scanner/interpreter/*/*.rb").each { |f| require f }
|
|
7
|
+
|
|
8
|
+
using Extension
|
|
9
|
+
|
|
10
|
+
# General scheme, contains all graphics (molecules, text, arrows ...)
|
|
11
|
+
class Scheme
|
|
12
|
+
attr_reader :mol_map, :text_map, :bracket_map, :reactions,
|
|
13
|
+
:n_atoms, :fragment_as_line
|
|
14
|
+
|
|
15
|
+
include PreProcess
|
|
16
|
+
include ReactionDetection
|
|
17
|
+
include PostProcess
|
|
18
|
+
|
|
19
|
+
def initialize(parser)
|
|
20
|
+
fragment_map = parser.fragment_map.map { |k, v| [k, Fragment.new(v)] }
|
|
21
|
+
@fragment_map = fragment_map.to_h
|
|
22
|
+
@fragment_group_map = parser.fragment_group_map
|
|
23
|
+
|
|
24
|
+
@geometry_map = parser.geometry_map
|
|
25
|
+
@graphic_map = parser.graphic_map
|
|
26
|
+
|
|
27
|
+
@text_map = parser.text_map
|
|
28
|
+
@bracket_map = parser.bracket_map
|
|
29
|
+
|
|
30
|
+
@mol_map = ElementMap.new
|
|
31
|
+
@mol_group_map = ElementMap.new
|
|
32
|
+
|
|
33
|
+
@arrow_map = ElementMap.new
|
|
34
|
+
# Segment or headless arrow
|
|
35
|
+
@segment_map = ElementMap.new
|
|
36
|
+
|
|
37
|
+
@mol_substitutes = {}
|
|
38
|
+
@reaction_substitutes = {}
|
|
39
|
+
|
|
40
|
+
@fragment_as_line = 0
|
|
41
|
+
|
|
42
|
+
@reactions = []
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def interpret
|
|
46
|
+
pre_process
|
|
47
|
+
reaction_detection
|
|
48
|
+
post_process
|
|
49
|
+
|
|
50
|
+
tgi = TextGroupInterpreter.new(self)
|
|
51
|
+
# Detect if molecule has any n-atom, save those infos
|
|
52
|
+
tgi.retrieve_n_atoms_info
|
|
53
|
+
|
|
54
|
+
@n_atoms = tgi.n_atoms
|
|
55
|
+
|
|
56
|
+
# Retrieve rgroups, alias-groups of molecules
|
|
57
|
+
tgi.retrieve_alias_info
|
|
58
|
+
|
|
59
|
+
# - Find R-groups ("R1", "R2", "R", ...)
|
|
60
|
+
# - Find alias-groups ("X", "Y", "Ar", "M")
|
|
61
|
+
# - Detect label set ("2a,b" "3-6" ...)
|
|
62
|
+
# tgi.retrieve_labels_and_groups
|
|
63
|
+
|
|
64
|
+
# - Combine corresponding addition info detected molecule/reaction text
|
|
65
|
+
# e.g., "3: m = 1, R = H"
|
|
66
|
+
# - Interpret previouse retrieved data
|
|
67
|
+
# - Save those infos to generate molecules/reactions later
|
|
68
|
+
# interpret_labels_and_groups
|
|
69
|
+
|
|
70
|
+
# Try generate new molecules/reactions
|
|
71
|
+
# based on R-groups, alias-groups, n-atoms ...
|
|
72
|
+
tgi.generate_elements
|
|
73
|
+
|
|
74
|
+
@mol_group_map.each do |_, mgroup|
|
|
75
|
+
mgroup.molecules.each do |m|
|
|
76
|
+
@mol_map[m.id] = m unless @mol_map.key?(m.id)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def molecules
|
|
82
|
+
@mol_map.values
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
private
|
|
86
|
+
|
|
87
|
+
def pre_process
|
|
88
|
+
# Retrieve fragments which are covered by a rectangle
|
|
89
|
+
find_fragment_inside_rectangle
|
|
90
|
+
|
|
91
|
+
# - Attach detected above to arrow
|
|
92
|
+
# - Try to detect cross arrow ( --//--> or --X--> )
|
|
93
|
+
#
|
|
94
|
+
# -----|
|
|
95
|
+
# |
|
|
96
|
+
# V
|
|
97
|
+
# - Extend arrows if possible
|
|
98
|
+
#
|
|
99
|
+
# |------>
|
|
100
|
+
# |
|
|
101
|
+
# -----|
|
|
102
|
+
# |
|
|
103
|
+
# |------>
|
|
104
|
+
# - Split extend arrows if possible
|
|
105
|
+
refine_arrow
|
|
106
|
+
|
|
107
|
+
extract_fragment_graphic
|
|
108
|
+
refine_molecules
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def reaction_detection
|
|
112
|
+
# Adding molecules based on molecules and arrow position
|
|
113
|
+
assign_to_reaction
|
|
114
|
+
|
|
115
|
+
# (1): A ---> C
|
|
116
|
+
#
|
|
117
|
+
# (2): B ---> D
|
|
118
|
+
# |
|
|
119
|
+
# |
|
|
120
|
+
# V
|
|
121
|
+
# E
|
|
122
|
+
# Remove C from (2)
|
|
123
|
+
#
|
|
124
|
+
# Remove if one molecule is seperated against other in the same group
|
|
125
|
+
# If it is too far, will consider it not a part of the reaction
|
|
126
|
+
remove_separated_mol
|
|
127
|
+
|
|
128
|
+
# Following current algorithm, reagents could belongs to multiple
|
|
129
|
+
# reactions. Only take the nearest one
|
|
130
|
+
refine_duplicate_reagents
|
|
131
|
+
|
|
132
|
+
# Attach text to molecule or arrow
|
|
133
|
+
# Process molecule label
|
|
134
|
+
assign_text
|
|
135
|
+
|
|
136
|
+
# Text can also be reactants/products.
|
|
137
|
+
# Process these ONLY IF text does not belong to any reaction or molecule
|
|
138
|
+
assign_molecule_group
|
|
139
|
+
|
|
140
|
+
# NOTE: Handle some specific scenario from here
|
|
141
|
+
|
|
142
|
+
# A -> B ->
|
|
143
|
+
# C- > D -> E
|
|
144
|
+
# For this case, we will have an extra implicit reaction: B -> C
|
|
145
|
+
# For now, only deal with this case if all arrows are horizontal
|
|
146
|
+
multi_line_chain_reaction
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def post_process
|
|
150
|
+
# Check if there is any label inside reagents
|
|
151
|
+
# which is not assigned to any molecule
|
|
152
|
+
refine_reagents_label
|
|
153
|
+
|
|
154
|
+
# Label usually present a molecule, process those in reagents text
|
|
155
|
+
replace_label_by_molecule
|
|
156
|
+
|
|
157
|
+
# Text-as-label, e.g. "ligand = ", "amide = "
|
|
158
|
+
refine_text_label
|
|
159
|
+
|
|
160
|
+
refine_text_as_molecule
|
|
161
|
+
|
|
162
|
+
# From id => molecule
|
|
163
|
+
assemble_reaction
|
|
164
|
+
|
|
165
|
+
# - Extract reaction-related information: temperature, time, yield
|
|
166
|
+
# - Try interpret abbreviations
|
|
167
|
+
@reactions.each { |r| process_reaction_info(r) }
|
|
168
|
+
|
|
169
|
+
process_reactions_step
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|