chem_scanner 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +13 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +604 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/CODE_OF_CONDUCT.md +74 -0
  9. data/Gemfile +20 -0
  10. data/LICENSE.txt +661 -0
  11. data/README.md +177 -0
  12. data/Rakefile +8 -0
  13. data/bin/console +14 -0
  14. data/bin/setup +8 -0
  15. data/chem_scanner.gemspec +43 -0
  16. data/lib/chem_scanner.rb +79 -0
  17. data/lib/chem_scanner/cdx.rb +67 -0
  18. data/lib/chem_scanner/cdxml.rb +72 -0
  19. data/lib/chem_scanner/chem_draw/cdx_reader.rb +101 -0
  20. data/lib/chem_scanner/chem_draw/node/base_node.rb +123 -0
  21. data/lib/chem_scanner/chem_draw/node/base_value.rb +257 -0
  22. data/lib/chem_scanner/chem_draw/node/bond.rb +100 -0
  23. data/lib/chem_scanner/chem_draw/node/bracket_attachment.rb +17 -0
  24. data/lib/chem_scanner/chem_draw/node/bracket_group.rb +32 -0
  25. data/lib/chem_scanner/chem_draw/node/chem_geometry.rb +58 -0
  26. data/lib/chem_scanner/chem_draw/node/color_table.rb +46 -0
  27. data/lib/chem_scanner/chem_draw/node/font_table.rb +54 -0
  28. data/lib/chem_scanner/chem_draw/node/fragment.rb +149 -0
  29. data/lib/chem_scanner/chem_draw/node/fragment_node.rb +145 -0
  30. data/lib/chem_scanner/chem_draw/node/graphic.rb +94 -0
  31. data/lib/chem_scanner/chem_draw/node/text.rb +242 -0
  32. data/lib/chem_scanner/chem_draw/parser.rb +214 -0
  33. data/lib/chem_scanner/chem_draw/yaml/cdx_objects.yaml +32 -0
  34. data/lib/chem_scanner/chem_draw/yaml/cdx_props.yaml +263 -0
  35. data/lib/chem_scanner/chem_draw/yaml/cdxml_objects.yaml +36 -0
  36. data/lib/chem_scanner/chem_draw/yaml/cdxml_props.yaml +263 -0
  37. data/lib/chem_scanner/chem_draw/yaml/props_data_type.yaml +263 -0
  38. data/lib/chem_scanner/configuration/abbreviation.rb +76 -0
  39. data/lib/chem_scanner/configuration/superatom.rb +76 -0
  40. data/lib/chem_scanner/configuration/superatom.txt +2874 -0
  41. data/lib/chem_scanner/configuration/util.rb +40 -0
  42. data/lib/chem_scanner/configuration/yaml/abbreviations.yaml +6399 -0
  43. data/lib/chem_scanner/configuration/yaml/elements.yaml +115 -0
  44. data/lib/chem_scanner/configuration/yaml/solvents.yaml +16 -0
  45. data/lib/chem_scanner/doc.rb +56 -0
  46. data/lib/chem_scanner/docx.rb +86 -0
  47. data/lib/chem_scanner/export/cml.rb +176 -0
  48. data/lib/chem_scanner/extension/element_map.rb +9 -0
  49. data/lib/chem_scanner/extension/geometry/bounding_box.rb +84 -0
  50. data/lib/chem_scanner/extension/geometry/line.rb +123 -0
  51. data/lib/chem_scanner/extension/geometry/point.rb +18 -0
  52. data/lib/chem_scanner/extension/geometry/polygon.rb +115 -0
  53. data/lib/chem_scanner/extension/geometry/segment.rb +196 -0
  54. data/lib/chem_scanner/extension/passthrough.rb +7 -0
  55. data/lib/chem_scanner/interpreter/element/arrow.rb +298 -0
  56. data/lib/chem_scanner/interpreter/element/atom.rb +134 -0
  57. data/lib/chem_scanner/interpreter/element/fragment.rb +59 -0
  58. data/lib/chem_scanner/interpreter/element/molecule.rb +473 -0
  59. data/lib/chem_scanner/interpreter/element/molecule_group.rb +34 -0
  60. data/lib/chem_scanner/interpreter/element/reaction.rb +186 -0
  61. data/lib/chem_scanner/interpreter/element/reaction_step.rb +39 -0
  62. data/lib/chem_scanner/interpreter/formula_to_mol.rb +75 -0
  63. data/lib/chem_scanner/interpreter/post_process/assemble.rb +38 -0
  64. data/lib/chem_scanner/interpreter/post_process/label_by_molecule.rb +37 -0
  65. data/lib/chem_scanner/interpreter/post_process/reaction_info.rb +225 -0
  66. data/lib/chem_scanner/interpreter/post_process/reaction_step.rb +95 -0
  67. data/lib/chem_scanner/interpreter/post_process/reagent_label.rb +46 -0
  68. data/lib/chem_scanner/interpreter/post_process/text_as_molecule.rb +52 -0
  69. data/lib/chem_scanner/interpreter/post_process/text_label.rb +40 -0
  70. data/lib/chem_scanner/interpreter/pre_process/arrow.rb +197 -0
  71. data/lib/chem_scanner/interpreter/pre_process/graphic.rb +41 -0
  72. data/lib/chem_scanner/interpreter/pre_process/molecule.rb +150 -0
  73. data/lib/chem_scanner/interpreter/reaction_detection/assign_to_reaction.rb +129 -0
  74. data/lib/chem_scanner/interpreter/reaction_detection/duplicate_reagents.rb +50 -0
  75. data/lib/chem_scanner/interpreter/reaction_detection/molecule_group.rb +55 -0
  76. data/lib/chem_scanner/interpreter/reaction_detection/multi_line_chain_reaction.rb +85 -0
  77. data/lib/chem_scanner/interpreter/reaction_detection/remove_separated_mol.rb +115 -0
  78. data/lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb +166 -0
  79. data/lib/chem_scanner/interpreter/scheme.rb +173 -0
  80. data/lib/chem_scanner/interpreter/scheme_base.rb +64 -0
  81. data/lib/chem_scanner/interpreter/text_group/bold_groups.rb +183 -0
  82. data/lib/chem_scanner/interpreter/text_group/molecule_text_group.rb +138 -0
  83. data/lib/chem_scanner/interpreter/text_group/reaction_text_groups.rb +221 -0
  84. data/lib/chem_scanner/interpreter/text_group/retrieve_alias_info.rb +41 -0
  85. data/lib/chem_scanner/interpreter/text_group/retrieve_n_atoms.rb +106 -0
  86. data/lib/chem_scanner/interpreter/text_group/text_group_interpreter.rb +92 -0
  87. data/lib/chem_scanner/perkin_eln.rb +287 -0
  88. data/lib/chem_scanner/version.rb +5 -0
  89. data/lib/rubygems_plugin.rb +5 -0
  90. metadata +244 -0
@@ -0,0 +1,7 @@
1
+ module Passthrough
2
+ def passthrough(object)
3
+ object.instance_variables.each do |iv|
4
+ instance_variable_set(iv, object.instance_variable_get(iv))
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,298 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module Interpreter
5
+ using Extension
6
+
7
+ # Base Arrow, independent from reader
8
+ class Arrow
9
+ attr_accessor :id, :middle_points, :tail, :head, :descriptions,
10
+ :text_arr, :cross, :cross_lines, :reagents_polygons,
11
+ :height, :line_type
12
+
13
+ # Polyline path: tail -> middle1 -> middle2 -> ... -> head
14
+ def initialize(geometry)
15
+ @geometry = geometry
16
+ @id = geometry.id
17
+ @tail = Geometry::Point.new(geometry.tail[:x], geometry.tail[:y])
18
+ @head = Geometry::Point.new(geometry.head[:x], geometry.head[:y])
19
+
20
+ @middle_points = []
21
+ @cross = geometry.cross?
22
+ @line_type = geometry.line_type
23
+ @cross_lines = []
24
+ @height = 0
25
+ @text_arr = []
26
+ @reagents_polygons = []
27
+ end
28
+
29
+ def points
30
+ [@tail] + @middle_points + [@head]
31
+ end
32
+
33
+ def segments
34
+ arr = []
35
+ points.each_with_index do |point, idx|
36
+ next if idx == points.count - 1
37
+
38
+ segment = Geometry::Segment.new(point, points[idx + 1])
39
+ arr.push(segment)
40
+ end
41
+
42
+ arr
43
+ end
44
+
45
+ def head_segment
46
+ point = @middle_points.count.zero? ? @tail : @middle_points.last
47
+ Geometry::Segment.new(point, @head)
48
+ end
49
+
50
+ def head_perpen_points
51
+ return nil if @head.nil? || @height.zero?
52
+
53
+ tail = @middle_points.count.zero? ? @tail : @middle_points.last
54
+ Geometry::Segment.new(tail, @head).tail_perpen_points_dist(@height)
55
+ end
56
+
57
+ def head_perpen_segment
58
+ p1, p2 = head_perpen_points
59
+ Geometry::Segment.new(p1, p2)
60
+ end
61
+
62
+ def tail_segment
63
+ point = @middle_points.count.zero? ? @head : @middle_points.first
64
+ Geometry::Segment.new(point, @tail)
65
+ end
66
+
67
+ def tail_perpen_points
68
+ return nil if @tail.nil? || @height.zero?
69
+
70
+ head = @middle_points.count.zero? ? @head : @middle_points.first
71
+ Geometry::Segment.new(@tail, head).head_perpen_points_dist(@height)
72
+ end
73
+
74
+ def tail_perpen_segment
75
+ p1, p2 = tail_perpen_points
76
+ Geometry::Segment.new(p1, p2)
77
+ end
78
+
79
+ def tail_head_segment
80
+ Geometry::Segment.new(@tail, @head)
81
+ end
82
+
83
+ def add_cross_segment(other)
84
+ @cross_lines.push(other)
85
+ @cross = true
86
+ end
87
+
88
+ def change_head(new_head)
89
+ @middle_points.push(@head)
90
+ @head = Geometry::Point.new(new_head[:x], new_head[:y])
91
+ end
92
+
93
+ def change_tail(new_tail)
94
+ @middle_points.unshift(@tail)
95
+ @tail = Geometry::Point.new(new_tail.x, new_tail.y)
96
+ end
97
+
98
+ def update_tail(new_tail)
99
+ @tail = Geometry::Point.new(new_tail.x, new_tail.y)
100
+ end
101
+
102
+ # Polyline path: tail -> middle1 -> middle2 -> ... -> head
103
+ def build_polygons(height)
104
+ @height = height
105
+ @reagents_polygons = []
106
+
107
+ segments.each do |segment|
108
+ p1, p2 = segment.head_perpen_points_dist(height)
109
+ p3, p4 = segment.tail_perpen_points_dist(height)
110
+
111
+ polygon = Geometry::Polygon.new([p1, p2, p4, p3])
112
+ @reagents_polygons.push(polygon)
113
+ end
114
+ end
115
+
116
+ def build_polygons_on_polygons(polygons)
117
+ list_height = []
118
+ @reagents_polygons = []
119
+
120
+ segments.each do |segment|
121
+ p1 = segment.point1
122
+ p2 = segment.point2
123
+ hperpen = segment.to_line.perpen_line_via_point(p1)
124
+ tperpen = segment.to_line.perpen_line_via_point(p2)
125
+
126
+ list_points = []
127
+
128
+ polygons.each do |poly|
129
+ next unless segment.polygon_in_range(poly)
130
+
131
+ poly_points = poly.vertices.each_with_object([]) do |v, arr|
132
+ arr.push(hperpen.point_projection(v))
133
+ arr.push(tperpen.point_projection(v))
134
+ end
135
+ list_points.concat(poly_points).concat([p1, p2])
136
+ end
137
+
138
+ if list_points.empty?
139
+ build_polygons(0.2)
140
+ next
141
+ end
142
+
143
+ xmax = list_points.map(&:x).max + 0.5
144
+ xmin = list_points.map(&:x).min - 0.5
145
+ ymin = list_points.map(&:y).min - 0.5
146
+ ymax = list_points.map(&:y).max + 0.5
147
+
148
+ poly_points = [
149
+ Geometry::Point.new(xmin, ymin),
150
+ Geometry::Point.new(xmin, ymax),
151
+ Geometry::Point.new(xmax, ymax),
152
+ Geometry::Point.new(xmax, ymin),
153
+ ]
154
+ list_height.push((ymax - ymin).abs)
155
+
156
+ @reagents_polygons.push(Geometry::Polygon.new(poly_points))
157
+ end
158
+
159
+ @height = list_height.max || 0.1
160
+ end
161
+
162
+ def product_side?(point)
163
+ line = head_perpen_segment.to_line
164
+ side = line.point_side(@head) * line.point_side(point)
165
+
166
+ side.positive?
167
+ end
168
+
169
+ def reactant_side?(point)
170
+ line = tail_perpen_segment.to_line
171
+ side = line.point_side(@tail) * line.point_side(point)
172
+
173
+ side.positive?
174
+ end
175
+
176
+ def contains_point?(point)
177
+ segments.each do |segment|
178
+ ppoint = segment.to_line.point_projection(point)
179
+ return ppoint if segment.contains_point?(ppoint)
180
+ end
181
+
182
+ nil
183
+ end
184
+
185
+ def min_distance_to_polygon(polygon)
186
+ dist_arr = []
187
+ bbox = polygon.bounding_box
188
+
189
+ segments.each do |segment|
190
+ dist = segment.distance_to_boundingbox(bbox)
191
+ dist_arr.push(dist)
192
+ end
193
+
194
+ dist_arr.min
195
+ end
196
+
197
+ def dist_to_head(point)
198
+ Geometry::Segment.new(point, @head).length
199
+ end
200
+
201
+ def dist_to_tail(point)
202
+ Geometry::Segment.new(point, @tail).length
203
+ end
204
+
205
+ def polygon_around?(poly)
206
+ @reagents_polygons.each do |rpoly|
207
+ return true if poly.around_polygon?(rpoly)
208
+ end
209
+
210
+ false
211
+ end
212
+
213
+ def all_intersects_with_segment?(segment)
214
+ @reagents_polygons.each do |rpoly|
215
+ return false unless segment.intersects_with_polygon?(rpoly)
216
+ end
217
+
218
+ true
219
+ end
220
+
221
+ def parallel_to?(other)
222
+ segments.each do |seg|
223
+ other.segments.each do |oseg|
224
+ return false unless seg.parallel_to?(oseg)
225
+ end
226
+ end
227
+
228
+ true
229
+ end
230
+
231
+ def poly_in_middle?(poly)
232
+ poly_points = poly.bounding_box.points.push(poly.center)
233
+
234
+ in_middle = false
235
+ poly_points.each do |point|
236
+ in_middle |= point_in_middle(point)
237
+ end
238
+
239
+ in_middle
240
+ end
241
+
242
+ def point_in_middle(target_point)
243
+ in_middle = false
244
+
245
+ points.each_with_index do |point, idx|
246
+ next if idx.zero?
247
+
248
+ segment = Geometry::Segment.new(point, points[idx - 1])
249
+ ppoint = segment.to_line.point_projection(target_point)
250
+
251
+ from_head = if idx == 1 then true
252
+ elsif idx == (points.size - 1) then false
253
+ end
254
+ in_middle |= segment.point_in_range(ppoint, 4.0 / 5.0, from_head)
255
+ end
256
+
257
+ in_middle
258
+ end
259
+
260
+ def clone
261
+ cloned = self.class.new(@geometry)
262
+ cloned.id = get_tempid
263
+
264
+ cloned.tail = @tail.clone
265
+ cloned.head = @head.clone
266
+ cloned.middle_points = Marshal.load(Marshal.dump(@middle_points))
267
+
268
+ cloned.descriptions = @descriptions
269
+ cloned.cross = @cross
270
+ cloned.cross_lines = Marshal.load(Marshal.dump(@cross_lines))
271
+ cloned.reagents_polygons = @reagents_polygons.clone
272
+ cloned.height = @height
273
+ cloned.line_type = @line_type
274
+
275
+ cloned
276
+ end
277
+
278
+ def get_tempid
279
+ @geometry.get_tempid
280
+ end
281
+
282
+ def inspect
283
+ (
284
+ "#<Arrow: id=#{id}, " +
285
+ "reagents_polygon: #{reagents_polygons}," +
286
+ "tail: #{tail}, " +
287
+ "head: #{head}, " +
288
+ "middle_points: #{middle_points}, " +
289
+ "cross: #{cross}, " +
290
+ "height: #{height}, " +
291
+ "line_type: #{line_type}, " +
292
+ "cross_lines: #{cross_lines}, " +
293
+ "text_arr: #{text_arr} >"
294
+ )
295
+ end
296
+ end
297
+ end
298
+ end
@@ -0,0 +1,134 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module Interpreter
5
+ # Atom class
6
+ class Atom
7
+ attr_accessor :is_alias, :alias_text, :charge
8
+ attr_reader :type, :ext_type, :warning, :warning_data, :point, :is_polymer
9
+
10
+ def initialize(node, rw_mol)
11
+ @rw_mol = rw_mol
12
+
13
+ @node = node
14
+
15
+ @type = node.type
16
+ @ext_type = node.ext_type
17
+ @atnum = node.atnum
18
+ @num_hydrogens = node.num_hydrogens
19
+ @charge = node.charge
20
+ @iso = node.iso
21
+ @x = node.x || 0
22
+ @y = node.y || 0
23
+ @point = node.point
24
+
25
+ @is_alias = node.is_alias
26
+ @alias_text = node.alias_text.strip
27
+ @warning = node.warning
28
+ @warning_data = node.warning_data
29
+
30
+ @is_polymer = node.is_polymer
31
+ end
32
+
33
+ def process
34
+ # Set default to Carbon
35
+ @atnum.negative? && @atnum = 6
36
+ @rw_mol.add_atom(RDKitChem::Atom.new(@atnum), false)
37
+ rd_atom = @rw_mol.get_last_atom
38
+ @rw_mol.set_atom_bookmark(rd_atom, @node.id)
39
+
40
+ @num_hydrogens >= 0 && rd_atom.set_num_explicit_hs(@num_hydrogens)
41
+ rd_atom.set_formal_charge(@charge)
42
+ rd_atom.set_isotope(@iso)
43
+ conf = @rw_mol.get_conformer(0)
44
+ conf.set_atom_pos(rd_atom.get_idx, RDKitChem::Point3D.new(@x, @y, 0))
45
+
46
+ process_alias
47
+ end
48
+
49
+ def get_rd_atom
50
+ @rw_mol.get_atom_with_bookmark(@node.id)
51
+ end
52
+
53
+ def get_idx
54
+ get_rd_atom.get_idx
55
+ end
56
+
57
+ def id
58
+ @node.id
59
+ end
60
+
61
+ def inspect
62
+ (
63
+ "#<Atom: id=#{@node.id}, " +
64
+ "type: #{@type}, " +
65
+ "external_type: #{@ext_type}, " +
66
+ "atnum: #{@atnum}, " +
67
+ "num_hydrogens: #{@num_hydrogens}, " +
68
+ "charge: #{charge}, " +
69
+ "iso: #{@iso}, " +
70
+ "x: #{@x}, " +
71
+ "y: #{@y}, " +
72
+ "is_alias: #{is_alias}, " +
73
+ "is_polymer: #{is_polymer}, " +
74
+ "alias_text: #{alias_text}, " +
75
+ "warning_data: #{@warning_data}, " +
76
+ "warning: #{@warning} >"
77
+ )
78
+ end
79
+
80
+ def clone
81
+ cnode = @node.clone
82
+ clone = self.class.new(cnode, @rw_mol)
83
+ clone.process
84
+
85
+ clone
86
+ end
87
+
88
+ def set_2d(coord_x, coord_y)
89
+ @x = coord_x
90
+ @y = coord_y
91
+
92
+ conf = @rw_mol.get_conformer(0)
93
+ conf.set_atom_pos(get_idx, RDKitChem::Point3D.new(@x, @y, 0))
94
+ end
95
+
96
+ def set_formal_charge(charge)
97
+ @charge = charge
98
+ rd_atom = get_rd_atom
99
+ rd_atom.set_formal_charge(charge)
100
+ end
101
+
102
+ def set_polymer
103
+ @is_alias = true
104
+ @is_polymer = true
105
+ end
106
+
107
+ private
108
+
109
+ def process_alias
110
+ alias_groups = ChemScanner::Interpreter::ALIAS_GROUP
111
+ is_alias_group = alias_groups.include?(@alias_text)
112
+ if is_alias_group
113
+ @type = 5
114
+ @is_alias = true
115
+ @warning = false
116
+ @warning_data = ""
117
+ end
118
+
119
+ sbase = ChemScanner::Interpreter
120
+ @is_alias ||= begin
121
+ !@alias_text.empty? && sbase.rgroup_atom?(@alias_text) && @type >= 0
122
+ end
123
+
124
+ # Polymer handling
125
+ set_polymer if @ext_type === 3
126
+
127
+ return unless @is_alias
128
+
129
+ rd_atom = get_rd_atom
130
+ rd_atom.set_atomic_num(0)
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module Interpreter
5
+ # Molecule class
6
+ class Fragment
7
+ extend Forwardable
8
+
9
+ def_delegators :@fragment, :id, :parser, :parser_type,
10
+ :polygon, :polygon=, :boxed, :boxed=,
11
+ :node_map, :node_map=, :bond_map, :bond_map=, :graphic_map
12
+
13
+ def initialize(chemdraw_fragment)
14
+ @fragment = chemdraw_fragment
15
+ end
16
+
17
+ def add(other)
18
+ @fragment.boxed |= other.boxed
19
+
20
+ @fragment.node_map.merge!(other.node_map)
21
+ @fragment.bond_map.merge!(other.bond_map)
22
+
23
+ @fragment.rebuild_polygon
24
+ end
25
+
26
+ def clone
27
+ cfrag = @fragment.clone
28
+ cfrag.set_new_id
29
+ cloned = self.class.new(cfrag)
30
+ cloned.boxed = @fragment.boxed
31
+ cloned.node_map = @fragment.node_map
32
+ cloned.bond_map = @fragment.bond_map
33
+
34
+ cloned
35
+ end
36
+
37
+ def set_id(new_id)
38
+ @fragment.id = new_id
39
+ end
40
+
41
+ def line?
42
+ node_map = @fragment.node_map
43
+ return false if node_map.count < 3
44
+
45
+ points = []
46
+ node_map.values.each_with_index do |node, i|
47
+ points << node.point
48
+ next if i < 2
49
+
50
+ seg1 = Geometry::Segment.new(points[i - 3], points[i - 2])
51
+ seg2 = Geometry::Segment.new(points[i - 2], points[i - 1])
52
+ return false unless seg1.lies_on_one_line_with?(seg2)
53
+ end
54
+
55
+ true
56
+ end
57
+ end
58
+ end
59
+ end