chem_scanner 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +13 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +604 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/CODE_OF_CONDUCT.md +74 -0
  9. data/Gemfile +20 -0
  10. data/LICENSE.txt +661 -0
  11. data/README.md +177 -0
  12. data/Rakefile +8 -0
  13. data/bin/console +14 -0
  14. data/bin/setup +8 -0
  15. data/chem_scanner.gemspec +43 -0
  16. data/lib/chem_scanner.rb +79 -0
  17. data/lib/chem_scanner/cdx.rb +67 -0
  18. data/lib/chem_scanner/cdxml.rb +72 -0
  19. data/lib/chem_scanner/chem_draw/cdx_reader.rb +101 -0
  20. data/lib/chem_scanner/chem_draw/node/base_node.rb +123 -0
  21. data/lib/chem_scanner/chem_draw/node/base_value.rb +257 -0
  22. data/lib/chem_scanner/chem_draw/node/bond.rb +100 -0
  23. data/lib/chem_scanner/chem_draw/node/bracket_attachment.rb +17 -0
  24. data/lib/chem_scanner/chem_draw/node/bracket_group.rb +32 -0
  25. data/lib/chem_scanner/chem_draw/node/chem_geometry.rb +58 -0
  26. data/lib/chem_scanner/chem_draw/node/color_table.rb +46 -0
  27. data/lib/chem_scanner/chem_draw/node/font_table.rb +54 -0
  28. data/lib/chem_scanner/chem_draw/node/fragment.rb +149 -0
  29. data/lib/chem_scanner/chem_draw/node/fragment_node.rb +145 -0
  30. data/lib/chem_scanner/chem_draw/node/graphic.rb +94 -0
  31. data/lib/chem_scanner/chem_draw/node/text.rb +242 -0
  32. data/lib/chem_scanner/chem_draw/parser.rb +214 -0
  33. data/lib/chem_scanner/chem_draw/yaml/cdx_objects.yaml +32 -0
  34. data/lib/chem_scanner/chem_draw/yaml/cdx_props.yaml +263 -0
  35. data/lib/chem_scanner/chem_draw/yaml/cdxml_objects.yaml +36 -0
  36. data/lib/chem_scanner/chem_draw/yaml/cdxml_props.yaml +263 -0
  37. data/lib/chem_scanner/chem_draw/yaml/props_data_type.yaml +263 -0
  38. data/lib/chem_scanner/configuration/abbreviation.rb +76 -0
  39. data/lib/chem_scanner/configuration/superatom.rb +76 -0
  40. data/lib/chem_scanner/configuration/superatom.txt +2874 -0
  41. data/lib/chem_scanner/configuration/util.rb +40 -0
  42. data/lib/chem_scanner/configuration/yaml/abbreviations.yaml +6399 -0
  43. data/lib/chem_scanner/configuration/yaml/elements.yaml +115 -0
  44. data/lib/chem_scanner/configuration/yaml/solvents.yaml +16 -0
  45. data/lib/chem_scanner/doc.rb +56 -0
  46. data/lib/chem_scanner/docx.rb +86 -0
  47. data/lib/chem_scanner/export/cml.rb +176 -0
  48. data/lib/chem_scanner/extension/element_map.rb +9 -0
  49. data/lib/chem_scanner/extension/geometry/bounding_box.rb +84 -0
  50. data/lib/chem_scanner/extension/geometry/line.rb +123 -0
  51. data/lib/chem_scanner/extension/geometry/point.rb +18 -0
  52. data/lib/chem_scanner/extension/geometry/polygon.rb +115 -0
  53. data/lib/chem_scanner/extension/geometry/segment.rb +196 -0
  54. data/lib/chem_scanner/extension/passthrough.rb +7 -0
  55. data/lib/chem_scanner/interpreter/element/arrow.rb +298 -0
  56. data/lib/chem_scanner/interpreter/element/atom.rb +134 -0
  57. data/lib/chem_scanner/interpreter/element/fragment.rb +59 -0
  58. data/lib/chem_scanner/interpreter/element/molecule.rb +473 -0
  59. data/lib/chem_scanner/interpreter/element/molecule_group.rb +34 -0
  60. data/lib/chem_scanner/interpreter/element/reaction.rb +186 -0
  61. data/lib/chem_scanner/interpreter/element/reaction_step.rb +39 -0
  62. data/lib/chem_scanner/interpreter/formula_to_mol.rb +75 -0
  63. data/lib/chem_scanner/interpreter/post_process/assemble.rb +38 -0
  64. data/lib/chem_scanner/interpreter/post_process/label_by_molecule.rb +37 -0
  65. data/lib/chem_scanner/interpreter/post_process/reaction_info.rb +225 -0
  66. data/lib/chem_scanner/interpreter/post_process/reaction_step.rb +95 -0
  67. data/lib/chem_scanner/interpreter/post_process/reagent_label.rb +46 -0
  68. data/lib/chem_scanner/interpreter/post_process/text_as_molecule.rb +52 -0
  69. data/lib/chem_scanner/interpreter/post_process/text_label.rb +40 -0
  70. data/lib/chem_scanner/interpreter/pre_process/arrow.rb +197 -0
  71. data/lib/chem_scanner/interpreter/pre_process/graphic.rb +41 -0
  72. data/lib/chem_scanner/interpreter/pre_process/molecule.rb +150 -0
  73. data/lib/chem_scanner/interpreter/reaction_detection/assign_to_reaction.rb +129 -0
  74. data/lib/chem_scanner/interpreter/reaction_detection/duplicate_reagents.rb +50 -0
  75. data/lib/chem_scanner/interpreter/reaction_detection/molecule_group.rb +55 -0
  76. data/lib/chem_scanner/interpreter/reaction_detection/multi_line_chain_reaction.rb +85 -0
  77. data/lib/chem_scanner/interpreter/reaction_detection/remove_separated_mol.rb +115 -0
  78. data/lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb +166 -0
  79. data/lib/chem_scanner/interpreter/scheme.rb +173 -0
  80. data/lib/chem_scanner/interpreter/scheme_base.rb +64 -0
  81. data/lib/chem_scanner/interpreter/text_group/bold_groups.rb +183 -0
  82. data/lib/chem_scanner/interpreter/text_group/molecule_text_group.rb +138 -0
  83. data/lib/chem_scanner/interpreter/text_group/reaction_text_groups.rb +221 -0
  84. data/lib/chem_scanner/interpreter/text_group/retrieve_alias_info.rb +41 -0
  85. data/lib/chem_scanner/interpreter/text_group/retrieve_n_atoms.rb +106 -0
  86. data/lib/chem_scanner/interpreter/text_group/text_group_interpreter.rb +92 -0
  87. data/lib/chem_scanner/perkin_eln.rb +287 -0
  88. data/lib/chem_scanner/version.rb +5 -0
  89. data/lib/rubygems_plugin.rb +5 -0
  90. metadata +244 -0
@@ -0,0 +1,7 @@
1
+ module Passthrough
2
+ def passthrough(object)
3
+ object.instance_variables.each do |iv|
4
+ instance_variable_set(iv, object.instance_variable_get(iv))
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,298 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module Interpreter
5
+ using Extension
6
+
7
+ # Base Arrow, independent from reader
8
+ class Arrow
9
+ attr_accessor :id, :middle_points, :tail, :head, :descriptions,
10
+ :text_arr, :cross, :cross_lines, :reagents_polygons,
11
+ :height, :line_type
12
+
13
+ # Polyline path: tail -> middle1 -> middle2 -> ... -> head
14
+ def initialize(geometry)
15
+ @geometry = geometry
16
+ @id = geometry.id
17
+ @tail = Geometry::Point.new(geometry.tail[:x], geometry.tail[:y])
18
+ @head = Geometry::Point.new(geometry.head[:x], geometry.head[:y])
19
+
20
+ @middle_points = []
21
+ @cross = geometry.cross?
22
+ @line_type = geometry.line_type
23
+ @cross_lines = []
24
+ @height = 0
25
+ @text_arr = []
26
+ @reagents_polygons = []
27
+ end
28
+
29
+ def points
30
+ [@tail] + @middle_points + [@head]
31
+ end
32
+
33
+ def segments
34
+ arr = []
35
+ points.each_with_index do |point, idx|
36
+ next if idx == points.count - 1
37
+
38
+ segment = Geometry::Segment.new(point, points[idx + 1])
39
+ arr.push(segment)
40
+ end
41
+
42
+ arr
43
+ end
44
+
45
+ def head_segment
46
+ point = @middle_points.count.zero? ? @tail : @middle_points.last
47
+ Geometry::Segment.new(point, @head)
48
+ end
49
+
50
+ def head_perpen_points
51
+ return nil if @head.nil? || @height.zero?
52
+
53
+ tail = @middle_points.count.zero? ? @tail : @middle_points.last
54
+ Geometry::Segment.new(tail, @head).tail_perpen_points_dist(@height)
55
+ end
56
+
57
+ def head_perpen_segment
58
+ p1, p2 = head_perpen_points
59
+ Geometry::Segment.new(p1, p2)
60
+ end
61
+
62
+ def tail_segment
63
+ point = @middle_points.count.zero? ? @head : @middle_points.first
64
+ Geometry::Segment.new(point, @tail)
65
+ end
66
+
67
+ def tail_perpen_points
68
+ return nil if @tail.nil? || @height.zero?
69
+
70
+ head = @middle_points.count.zero? ? @head : @middle_points.first
71
+ Geometry::Segment.new(@tail, head).head_perpen_points_dist(@height)
72
+ end
73
+
74
+ def tail_perpen_segment
75
+ p1, p2 = tail_perpen_points
76
+ Geometry::Segment.new(p1, p2)
77
+ end
78
+
79
+ def tail_head_segment
80
+ Geometry::Segment.new(@tail, @head)
81
+ end
82
+
83
+ def add_cross_segment(other)
84
+ @cross_lines.push(other)
85
+ @cross = true
86
+ end
87
+
88
+ def change_head(new_head)
89
+ @middle_points.push(@head)
90
+ @head = Geometry::Point.new(new_head[:x], new_head[:y])
91
+ end
92
+
93
+ def change_tail(new_tail)
94
+ @middle_points.unshift(@tail)
95
+ @tail = Geometry::Point.new(new_tail.x, new_tail.y)
96
+ end
97
+
98
+ def update_tail(new_tail)
99
+ @tail = Geometry::Point.new(new_tail.x, new_tail.y)
100
+ end
101
+
102
+ # Polyline path: tail -> middle1 -> middle2 -> ... -> head
103
+ def build_polygons(height)
104
+ @height = height
105
+ @reagents_polygons = []
106
+
107
+ segments.each do |segment|
108
+ p1, p2 = segment.head_perpen_points_dist(height)
109
+ p3, p4 = segment.tail_perpen_points_dist(height)
110
+
111
+ polygon = Geometry::Polygon.new([p1, p2, p4, p3])
112
+ @reagents_polygons.push(polygon)
113
+ end
114
+ end
115
+
116
+ def build_polygons_on_polygons(polygons)
117
+ list_height = []
118
+ @reagents_polygons = []
119
+
120
+ segments.each do |segment|
121
+ p1 = segment.point1
122
+ p2 = segment.point2
123
+ hperpen = segment.to_line.perpen_line_via_point(p1)
124
+ tperpen = segment.to_line.perpen_line_via_point(p2)
125
+
126
+ list_points = []
127
+
128
+ polygons.each do |poly|
129
+ next unless segment.polygon_in_range(poly)
130
+
131
+ poly_points = poly.vertices.each_with_object([]) do |v, arr|
132
+ arr.push(hperpen.point_projection(v))
133
+ arr.push(tperpen.point_projection(v))
134
+ end
135
+ list_points.concat(poly_points).concat([p1, p2])
136
+ end
137
+
138
+ if list_points.empty?
139
+ build_polygons(0.2)
140
+ next
141
+ end
142
+
143
+ xmax = list_points.map(&:x).max + 0.5
144
+ xmin = list_points.map(&:x).min - 0.5
145
+ ymin = list_points.map(&:y).min - 0.5
146
+ ymax = list_points.map(&:y).max + 0.5
147
+
148
+ poly_points = [
149
+ Geometry::Point.new(xmin, ymin),
150
+ Geometry::Point.new(xmin, ymax),
151
+ Geometry::Point.new(xmax, ymax),
152
+ Geometry::Point.new(xmax, ymin),
153
+ ]
154
+ list_height.push((ymax - ymin).abs)
155
+
156
+ @reagents_polygons.push(Geometry::Polygon.new(poly_points))
157
+ end
158
+
159
+ @height = list_height.max || 0.1
160
+ end
161
+
162
+ def product_side?(point)
163
+ line = head_perpen_segment.to_line
164
+ side = line.point_side(@head) * line.point_side(point)
165
+
166
+ side.positive?
167
+ end
168
+
169
+ def reactant_side?(point)
170
+ line = tail_perpen_segment.to_line
171
+ side = line.point_side(@tail) * line.point_side(point)
172
+
173
+ side.positive?
174
+ end
175
+
176
+ def contains_point?(point)
177
+ segments.each do |segment|
178
+ ppoint = segment.to_line.point_projection(point)
179
+ return ppoint if segment.contains_point?(ppoint)
180
+ end
181
+
182
+ nil
183
+ end
184
+
185
+ def min_distance_to_polygon(polygon)
186
+ dist_arr = []
187
+ bbox = polygon.bounding_box
188
+
189
+ segments.each do |segment|
190
+ dist = segment.distance_to_boundingbox(bbox)
191
+ dist_arr.push(dist)
192
+ end
193
+
194
+ dist_arr.min
195
+ end
196
+
197
+ def dist_to_head(point)
198
+ Geometry::Segment.new(point, @head).length
199
+ end
200
+
201
+ def dist_to_tail(point)
202
+ Geometry::Segment.new(point, @tail).length
203
+ end
204
+
205
+ def polygon_around?(poly)
206
+ @reagents_polygons.each do |rpoly|
207
+ return true if poly.around_polygon?(rpoly)
208
+ end
209
+
210
+ false
211
+ end
212
+
213
+ def all_intersects_with_segment?(segment)
214
+ @reagents_polygons.each do |rpoly|
215
+ return false unless segment.intersects_with_polygon?(rpoly)
216
+ end
217
+
218
+ true
219
+ end
220
+
221
+ def parallel_to?(other)
222
+ segments.each do |seg|
223
+ other.segments.each do |oseg|
224
+ return false unless seg.parallel_to?(oseg)
225
+ end
226
+ end
227
+
228
+ true
229
+ end
230
+
231
+ def poly_in_middle?(poly)
232
+ poly_points = poly.bounding_box.points.push(poly.center)
233
+
234
+ in_middle = false
235
+ poly_points.each do |point|
236
+ in_middle |= point_in_middle(point)
237
+ end
238
+
239
+ in_middle
240
+ end
241
+
242
+ def point_in_middle(target_point)
243
+ in_middle = false
244
+
245
+ points.each_with_index do |point, idx|
246
+ next if idx.zero?
247
+
248
+ segment = Geometry::Segment.new(point, points[idx - 1])
249
+ ppoint = segment.to_line.point_projection(target_point)
250
+
251
+ from_head = if idx == 1 then true
252
+ elsif idx == (points.size - 1) then false
253
+ end
254
+ in_middle |= segment.point_in_range(ppoint, 4.0 / 5.0, from_head)
255
+ end
256
+
257
+ in_middle
258
+ end
259
+
260
+ def clone
261
+ cloned = self.class.new(@geometry)
262
+ cloned.id = get_tempid
263
+
264
+ cloned.tail = @tail.clone
265
+ cloned.head = @head.clone
266
+ cloned.middle_points = Marshal.load(Marshal.dump(@middle_points))
267
+
268
+ cloned.descriptions = @descriptions
269
+ cloned.cross = @cross
270
+ cloned.cross_lines = Marshal.load(Marshal.dump(@cross_lines))
271
+ cloned.reagents_polygons = @reagents_polygons.clone
272
+ cloned.height = @height
273
+ cloned.line_type = @line_type
274
+
275
+ cloned
276
+ end
277
+
278
+ def get_tempid
279
+ @geometry.get_tempid
280
+ end
281
+
282
+ def inspect
283
+ (
284
+ "#<Arrow: id=#{id}, " +
285
+ "reagents_polygon: #{reagents_polygons}," +
286
+ "tail: #{tail}, " +
287
+ "head: #{head}, " +
288
+ "middle_points: #{middle_points}, " +
289
+ "cross: #{cross}, " +
290
+ "height: #{height}, " +
291
+ "line_type: #{line_type}, " +
292
+ "cross_lines: #{cross_lines}, " +
293
+ "text_arr: #{text_arr} >"
294
+ )
295
+ end
296
+ end
297
+ end
298
+ end
@@ -0,0 +1,134 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module Interpreter
5
+ # Atom class
6
+ class Atom
7
+ attr_accessor :is_alias, :alias_text, :charge
8
+ attr_reader :type, :ext_type, :warning, :warning_data, :point, :is_polymer
9
+
10
+ def initialize(node, rw_mol)
11
+ @rw_mol = rw_mol
12
+
13
+ @node = node
14
+
15
+ @type = node.type
16
+ @ext_type = node.ext_type
17
+ @atnum = node.atnum
18
+ @num_hydrogens = node.num_hydrogens
19
+ @charge = node.charge
20
+ @iso = node.iso
21
+ @x = node.x || 0
22
+ @y = node.y || 0
23
+ @point = node.point
24
+
25
+ @is_alias = node.is_alias
26
+ @alias_text = node.alias_text.strip
27
+ @warning = node.warning
28
+ @warning_data = node.warning_data
29
+
30
+ @is_polymer = node.is_polymer
31
+ end
32
+
33
+ def process
34
+ # Set default to Carbon
35
+ @atnum.negative? && @atnum = 6
36
+ @rw_mol.add_atom(RDKitChem::Atom.new(@atnum), false)
37
+ rd_atom = @rw_mol.get_last_atom
38
+ @rw_mol.set_atom_bookmark(rd_atom, @node.id)
39
+
40
+ @num_hydrogens >= 0 && rd_atom.set_num_explicit_hs(@num_hydrogens)
41
+ rd_atom.set_formal_charge(@charge)
42
+ rd_atom.set_isotope(@iso)
43
+ conf = @rw_mol.get_conformer(0)
44
+ conf.set_atom_pos(rd_atom.get_idx, RDKitChem::Point3D.new(@x, @y, 0))
45
+
46
+ process_alias
47
+ end
48
+
49
+ def get_rd_atom
50
+ @rw_mol.get_atom_with_bookmark(@node.id)
51
+ end
52
+
53
+ def get_idx
54
+ get_rd_atom.get_idx
55
+ end
56
+
57
+ def id
58
+ @node.id
59
+ end
60
+
61
+ def inspect
62
+ (
63
+ "#<Atom: id=#{@node.id}, " +
64
+ "type: #{@type}, " +
65
+ "external_type: #{@ext_type}, " +
66
+ "atnum: #{@atnum}, " +
67
+ "num_hydrogens: #{@num_hydrogens}, " +
68
+ "charge: #{charge}, " +
69
+ "iso: #{@iso}, " +
70
+ "x: #{@x}, " +
71
+ "y: #{@y}, " +
72
+ "is_alias: #{is_alias}, " +
73
+ "is_polymer: #{is_polymer}, " +
74
+ "alias_text: #{alias_text}, " +
75
+ "warning_data: #{@warning_data}, " +
76
+ "warning: #{@warning} >"
77
+ )
78
+ end
79
+
80
+ def clone
81
+ cnode = @node.clone
82
+ clone = self.class.new(cnode, @rw_mol)
83
+ clone.process
84
+
85
+ clone
86
+ end
87
+
88
+ def set_2d(coord_x, coord_y)
89
+ @x = coord_x
90
+ @y = coord_y
91
+
92
+ conf = @rw_mol.get_conformer(0)
93
+ conf.set_atom_pos(get_idx, RDKitChem::Point3D.new(@x, @y, 0))
94
+ end
95
+
96
+ def set_formal_charge(charge)
97
+ @charge = charge
98
+ rd_atom = get_rd_atom
99
+ rd_atom.set_formal_charge(charge)
100
+ end
101
+
102
+ def set_polymer
103
+ @is_alias = true
104
+ @is_polymer = true
105
+ end
106
+
107
+ private
108
+
109
+ def process_alias
110
+ alias_groups = ChemScanner::Interpreter::ALIAS_GROUP
111
+ is_alias_group = alias_groups.include?(@alias_text)
112
+ if is_alias_group
113
+ @type = 5
114
+ @is_alias = true
115
+ @warning = false
116
+ @warning_data = ""
117
+ end
118
+
119
+ sbase = ChemScanner::Interpreter
120
+ @is_alias ||= begin
121
+ !@alias_text.empty? && sbase.rgroup_atom?(@alias_text) && @type >= 0
122
+ end
123
+
124
+ # Polymer handling
125
+ set_polymer if @ext_type === 3
126
+
127
+ return unless @is_alias
128
+
129
+ rd_atom = get_rd_atom
130
+ rd_atom.set_atomic_num(0)
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module Interpreter
5
+ # Molecule class
6
+ class Fragment
7
+ extend Forwardable
8
+
9
+ def_delegators :@fragment, :id, :parser, :parser_type,
10
+ :polygon, :polygon=, :boxed, :boxed=,
11
+ :node_map, :node_map=, :bond_map, :bond_map=, :graphic_map
12
+
13
+ def initialize(chemdraw_fragment)
14
+ @fragment = chemdraw_fragment
15
+ end
16
+
17
+ def add(other)
18
+ @fragment.boxed |= other.boxed
19
+
20
+ @fragment.node_map.merge!(other.node_map)
21
+ @fragment.bond_map.merge!(other.bond_map)
22
+
23
+ @fragment.rebuild_polygon
24
+ end
25
+
26
+ def clone
27
+ cfrag = @fragment.clone
28
+ cfrag.set_new_id
29
+ cloned = self.class.new(cfrag)
30
+ cloned.boxed = @fragment.boxed
31
+ cloned.node_map = @fragment.node_map
32
+ cloned.bond_map = @fragment.bond_map
33
+
34
+ cloned
35
+ end
36
+
37
+ def set_id(new_id)
38
+ @fragment.id = new_id
39
+ end
40
+
41
+ def line?
42
+ node_map = @fragment.node_map
43
+ return false if node_map.count < 3
44
+
45
+ points = []
46
+ node_map.values.each_with_index do |node, i|
47
+ points << node.point
48
+ next if i < 2
49
+
50
+ seg1 = Geometry::Segment.new(points[i - 3], points[i - 2])
51
+ seg2 = Geometry::Segment.new(points[i - 2], points[i - 1])
52
+ return false unless seg1.lies_on_one_line_with?(seg2)
53
+ end
54
+
55
+ true
56
+ end
57
+ end
58
+ end
59
+ end