chem_scanner 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +13 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +604 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/CODE_OF_CONDUCT.md +74 -0
  9. data/Gemfile +20 -0
  10. data/LICENSE.txt +661 -0
  11. data/README.md +177 -0
  12. data/Rakefile +8 -0
  13. data/bin/console +14 -0
  14. data/bin/setup +8 -0
  15. data/chem_scanner.gemspec +43 -0
  16. data/lib/chem_scanner.rb +79 -0
  17. data/lib/chem_scanner/cdx.rb +67 -0
  18. data/lib/chem_scanner/cdxml.rb +72 -0
  19. data/lib/chem_scanner/chem_draw/cdx_reader.rb +101 -0
  20. data/lib/chem_scanner/chem_draw/node/base_node.rb +123 -0
  21. data/lib/chem_scanner/chem_draw/node/base_value.rb +257 -0
  22. data/lib/chem_scanner/chem_draw/node/bond.rb +100 -0
  23. data/lib/chem_scanner/chem_draw/node/bracket_attachment.rb +17 -0
  24. data/lib/chem_scanner/chem_draw/node/bracket_group.rb +32 -0
  25. data/lib/chem_scanner/chem_draw/node/chem_geometry.rb +58 -0
  26. data/lib/chem_scanner/chem_draw/node/color_table.rb +46 -0
  27. data/lib/chem_scanner/chem_draw/node/font_table.rb +54 -0
  28. data/lib/chem_scanner/chem_draw/node/fragment.rb +149 -0
  29. data/lib/chem_scanner/chem_draw/node/fragment_node.rb +145 -0
  30. data/lib/chem_scanner/chem_draw/node/graphic.rb +94 -0
  31. data/lib/chem_scanner/chem_draw/node/text.rb +242 -0
  32. data/lib/chem_scanner/chem_draw/parser.rb +214 -0
  33. data/lib/chem_scanner/chem_draw/yaml/cdx_objects.yaml +32 -0
  34. data/lib/chem_scanner/chem_draw/yaml/cdx_props.yaml +263 -0
  35. data/lib/chem_scanner/chem_draw/yaml/cdxml_objects.yaml +36 -0
  36. data/lib/chem_scanner/chem_draw/yaml/cdxml_props.yaml +263 -0
  37. data/lib/chem_scanner/chem_draw/yaml/props_data_type.yaml +263 -0
  38. data/lib/chem_scanner/configuration/abbreviation.rb +76 -0
  39. data/lib/chem_scanner/configuration/superatom.rb +76 -0
  40. data/lib/chem_scanner/configuration/superatom.txt +2874 -0
  41. data/lib/chem_scanner/configuration/util.rb +40 -0
  42. data/lib/chem_scanner/configuration/yaml/abbreviations.yaml +6399 -0
  43. data/lib/chem_scanner/configuration/yaml/elements.yaml +115 -0
  44. data/lib/chem_scanner/configuration/yaml/solvents.yaml +16 -0
  45. data/lib/chem_scanner/doc.rb +56 -0
  46. data/lib/chem_scanner/docx.rb +86 -0
  47. data/lib/chem_scanner/export/cml.rb +176 -0
  48. data/lib/chem_scanner/extension/element_map.rb +9 -0
  49. data/lib/chem_scanner/extension/geometry/bounding_box.rb +84 -0
  50. data/lib/chem_scanner/extension/geometry/line.rb +123 -0
  51. data/lib/chem_scanner/extension/geometry/point.rb +18 -0
  52. data/lib/chem_scanner/extension/geometry/polygon.rb +115 -0
  53. data/lib/chem_scanner/extension/geometry/segment.rb +196 -0
  54. data/lib/chem_scanner/extension/passthrough.rb +7 -0
  55. data/lib/chem_scanner/interpreter/element/arrow.rb +298 -0
  56. data/lib/chem_scanner/interpreter/element/atom.rb +134 -0
  57. data/lib/chem_scanner/interpreter/element/fragment.rb +59 -0
  58. data/lib/chem_scanner/interpreter/element/molecule.rb +473 -0
  59. data/lib/chem_scanner/interpreter/element/molecule_group.rb +34 -0
  60. data/lib/chem_scanner/interpreter/element/reaction.rb +186 -0
  61. data/lib/chem_scanner/interpreter/element/reaction_step.rb +39 -0
  62. data/lib/chem_scanner/interpreter/formula_to_mol.rb +75 -0
  63. data/lib/chem_scanner/interpreter/post_process/assemble.rb +38 -0
  64. data/lib/chem_scanner/interpreter/post_process/label_by_molecule.rb +37 -0
  65. data/lib/chem_scanner/interpreter/post_process/reaction_info.rb +225 -0
  66. data/lib/chem_scanner/interpreter/post_process/reaction_step.rb +95 -0
  67. data/lib/chem_scanner/interpreter/post_process/reagent_label.rb +46 -0
  68. data/lib/chem_scanner/interpreter/post_process/text_as_molecule.rb +52 -0
  69. data/lib/chem_scanner/interpreter/post_process/text_label.rb +40 -0
  70. data/lib/chem_scanner/interpreter/pre_process/arrow.rb +197 -0
  71. data/lib/chem_scanner/interpreter/pre_process/graphic.rb +41 -0
  72. data/lib/chem_scanner/interpreter/pre_process/molecule.rb +150 -0
  73. data/lib/chem_scanner/interpreter/reaction_detection/assign_to_reaction.rb +129 -0
  74. data/lib/chem_scanner/interpreter/reaction_detection/duplicate_reagents.rb +50 -0
  75. data/lib/chem_scanner/interpreter/reaction_detection/molecule_group.rb +55 -0
  76. data/lib/chem_scanner/interpreter/reaction_detection/multi_line_chain_reaction.rb +85 -0
  77. data/lib/chem_scanner/interpreter/reaction_detection/remove_separated_mol.rb +115 -0
  78. data/lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb +166 -0
  79. data/lib/chem_scanner/interpreter/scheme.rb +173 -0
  80. data/lib/chem_scanner/interpreter/scheme_base.rb +64 -0
  81. data/lib/chem_scanner/interpreter/text_group/bold_groups.rb +183 -0
  82. data/lib/chem_scanner/interpreter/text_group/molecule_text_group.rb +138 -0
  83. data/lib/chem_scanner/interpreter/text_group/reaction_text_groups.rb +221 -0
  84. data/lib/chem_scanner/interpreter/text_group/retrieve_alias_info.rb +41 -0
  85. data/lib/chem_scanner/interpreter/text_group/retrieve_n_atoms.rb +106 -0
  86. data/lib/chem_scanner/interpreter/text_group/text_group_interpreter.rb +92 -0
  87. data/lib/chem_scanner/perkin_eln.rb +287 -0
  88. data/lib/chem_scanner/version.rb +5 -0
  89. data/lib/rubygems_plugin.rb +5 -0
  90. metadata +244 -0
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ class ElementMap < Hash
5
+ def except(id)
6
+ reject { |key, _| key == id }
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "geometry"
4
+
5
+ module ChemScanner
6
+ # Extension module
7
+ module Extension
8
+ include Geometry
9
+
10
+ # Monkey path BoundingBox class from ruby-geometry
11
+ refine Geometry::BoundingBox do
12
+ def lefttop
13
+ Point.new(leftbottom.x, righttop.y)
14
+ end
15
+
16
+ def rightbottom
17
+ Point.new(righttop.x, leftbottom.y)
18
+ end
19
+
20
+ def center
21
+ lb = leftbottom
22
+ rt = righttop
23
+
24
+ Point.new((lb.x + rt.x) / 2, (lb.y + rt.y) / 2)
25
+ end
26
+
27
+ def edges
28
+ [
29
+ Segment.new(leftbottom, lefttop),
30
+ Segment.new(leftbottom, rightbottom),
31
+ Segment.new(lefttop, righttop),
32
+ Segment.new(rightbottom, righttop),
33
+ ]
34
+ end
35
+
36
+ def points
37
+ [leftbottom, lefttop, righttop, rightbottom]
38
+ end
39
+
40
+ def euclid_distance_to(other)
41
+ distance_list = []
42
+
43
+ edges.each do |edge|
44
+ other.edges.each do |oedge|
45
+ distance_list.push(edge.euclid_distance_to(oedge))
46
+ end
47
+ end
48
+
49
+ distance_list.min
50
+ end
51
+
52
+ def distance_to_point(point)
53
+ distance_list = []
54
+
55
+ edges.each do |edge|
56
+ distance_list.push(edge.distance_to(point))
57
+ end
58
+
59
+ distance_list.min
60
+ end
61
+
62
+ def euclid_distance_to_point(point)
63
+ point.distance_to(center)
64
+ end
65
+
66
+ def area
67
+ Segment.new(leftbottom, lefttop).length *
68
+ Segment.new(lefttop, righttop).length
69
+ end
70
+
71
+ def to_gis
72
+ coords = points.map { |point| "(#{point.x}, #{point.y})" }.join(",")
73
+ "POLYGON(#{coords})"
74
+ end
75
+
76
+ def contains_point?(point)
77
+ (
78
+ point.x <= righttop.x && point.x >= leftbottom.x &&
79
+ point.y <= righttop.y && point.y >= leftbottom.y
80
+ )
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,123 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "geometry"
4
+
5
+ module ChemScanner
6
+ # Monkey path Line class from ruby-geometry
7
+ module Extension
8
+ include Geometry
9
+
10
+ # Monkey patch Line class
11
+ refine Geometry::Line do
12
+ def angle
13
+ return 90 if vertical?
14
+ return 0 if horizontal?
15
+
16
+ p1, p2 = [point1, point2].sort_by(&:x)
17
+ delta_x = p1.x - p2.x
18
+ delta_y = p1.y - p2.y
19
+
20
+ arc = if point1.y > point2.y # inverted axis/origin
21
+ Math.atan(delta_y / delta_x)
22
+ else
23
+ Math.atan(delta_x / delta_y)
24
+ end
25
+ (arc.positive? ? arc : (2 * Math::PI + arc)) * 360 / (2 * Math::PI)
26
+ end
27
+
28
+ def to_segment
29
+ Segment.new(point1, point2)
30
+ end
31
+
32
+ def abc_coeff
33
+ a = point2.y - point1.y
34
+ b = point1.x - point2.x
35
+ c = a * point1.x + b * point1.y
36
+
37
+ [a, b, c]
38
+ end
39
+
40
+ # Get point belong to the line, give x or y
41
+ def get_point(value, is_y = false)
42
+ if is_y
43
+ x = x_from_y(value)
44
+ Point.new(x, Float(value))
45
+ end
46
+
47
+ y = y_from_x(value)
48
+ Point.new(Float(value), y)
49
+ end
50
+
51
+ def x_from_y(point_y)
52
+ b = point1.y - point2.y
53
+ return nil if b.zero?
54
+
55
+ Float(point1.x - ((point1.y - point_y) * (point1.x - point2.x) / b))
56
+ end
57
+
58
+ def y_from_x(point_x)
59
+ b = point1.x - point2.x
60
+ return nil if b.zero?
61
+
62
+ Float(point1.y - ((point1.x - point_x) * (point1.y - point2.y)) / b)
63
+ end
64
+
65
+ def intersects_with_segment?(segment)
66
+ segment.intersects_with_line?(self)
67
+ end
68
+
69
+ def intersects_with_polygon?(polygon)
70
+ polygon.edges.each do |edge|
71
+ return true if intersects_with_segment?(edge)
72
+ end
73
+
74
+ false
75
+ end
76
+
77
+ def intersection_points_with_polygon(polygon)
78
+ polygon.intersection_points_with_line(self)
79
+ end
80
+
81
+ def intersection_points_with(line)
82
+ return nil if parallel_to?(line)
83
+
84
+ # Ax + By = C
85
+ a1, b1, c1 = abc_coeff
86
+ a2, b2, c2 = line.abc_coeff
87
+
88
+ determinant = a1 * b2 - a2 * b1
89
+
90
+ x = (b2 * c1 - b1 * c2) / determinant
91
+ y = (a1 * c2 - a2 * c1) / determinant
92
+
93
+ Point.new(x, y)
94
+ end
95
+
96
+ # positive: same side with point2
97
+ # negative: same side with point1
98
+ def point_side(point)
99
+ v = Segment.new(point1, point).to_vector
100
+ to_segment.to_vector.cross_product(v)
101
+ end
102
+
103
+ def perpen_line_via_point(point)
104
+ if vertical?
105
+ Line.new(point, Point.new(point.x + 5, point.y))
106
+ elsif horizontal?
107
+ Line.new(point, Point.new(point.x, point.y + 5))
108
+ else
109
+ m2 = (-1 / slope)
110
+ x2 = point.x + 5
111
+ y2 = m2 * x2 + (point.y - m2 * point.x)
112
+
113
+ Line.new(point, Point.new(x2, y2))
114
+ end
115
+ end
116
+
117
+ def point_projection(point)
118
+ pline = perpen_line_via_point(point)
119
+ pline.intersection_points_with(self)
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "geometry"
4
+
5
+ module ChemScanner
6
+ # Monkey patch Point class from ruby-geometry
7
+ module Extension
8
+ refine Geometry::Point do
9
+ def euclid_distance_to_polygon(polygon)
10
+ polygon.euclid_distance_to_point(self)
11
+ end
12
+
13
+ def distance_to(other)
14
+ Geometry.distance(self, other)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "geometry"
4
+
5
+ module ChemScanner
6
+ include Geometry
7
+
8
+ # Extension module
9
+ module Extension
10
+ # Monkey path Polygon class from ruby-geometry
11
+ refine Geometry::Polygon do
12
+ def center
13
+ lb = bounding_box.leftbottom
14
+ rt = bounding_box.righttop
15
+
16
+ Point.new((lb.x + rt.x) / 2, (lb.y + rt.y) / 2)
17
+ end
18
+
19
+ def height
20
+ lb = bounding_box.leftbottom
21
+ lb.distance_to(bounding_box.lefttop)
22
+ end
23
+
24
+ def width
25
+ lb = bounding_box.leftbottom
26
+ lb.distance_to(bounding_box.rightbottom)
27
+ end
28
+
29
+ def intersects_with_polygon?(other)
30
+ edges.each do |e1|
31
+ other.edges.each do |e2|
32
+ return true if e1.intersects_with?(e2)
33
+ end
34
+ end
35
+
36
+ false
37
+ end
38
+
39
+ def contains_polygon?(other)
40
+ other.vertices.each do |v1|
41
+ return false unless contains?(v1)
42
+ end
43
+
44
+ true
45
+ end
46
+
47
+ def around_polygon?(other)
48
+ (
49
+ contains_polygon?(other) || other.contains_polygon?(self) ||
50
+ contains?(other.center) || other.contains?(center)
51
+ )
52
+ end
53
+
54
+ def merge_polygon(another)
55
+ lb = bounding_box.leftbottom
56
+ rt = bounding_box.righttop
57
+
58
+ alb = another.bounding_box.leftbottom
59
+ art = another.bounding_box.righttop
60
+
61
+ left = [lb.x, alb.x].min
62
+ bottom = [lb.y, alb.y].min
63
+ right = [rt.x, art.x].max
64
+ top = [rt.y, art.y].max
65
+
66
+ p1 = Point.new(left, bottom)
67
+ p2 = Point.new(left, top)
68
+ p3 = Point.new(right, top)
69
+ p4 = Point.new(right, bottom)
70
+
71
+ Polygon.new([p1, p2, p3, p4])
72
+ end
73
+
74
+ def distance_to_point(point)
75
+ min_dist = 9_999_999
76
+
77
+ edges.each do |edge|
78
+ dist = edge.distance_to(point)
79
+ min_dist = dist if dist < min_dist
80
+ end
81
+
82
+ min_dist
83
+ end
84
+
85
+ def euclid_distance_to_point(point)
86
+ min_dist = 9_999_999
87
+
88
+ edges.each do |edge|
89
+ dist = edge.euclid_distance_to_point(point)
90
+ min_dist = dist if dist < min_dist
91
+ end
92
+
93
+ min_dist
94
+ end
95
+
96
+ def intersection_points_with_line(line)
97
+ points = []
98
+
99
+ edges.each do |edge|
100
+ eline = edge.to_line
101
+ inter_x = eline.intersect_x(line)
102
+ next if inter_x.nil?
103
+
104
+ inter_y = line.y_from_x(inter_x)
105
+ inter_y = edge.to_line.y_from_x(inter_x) if inter_y.nil?
106
+
107
+ point = Point.new(inter_x, inter_y)
108
+ points.push(point) if edge.contains_point?(point)
109
+ end
110
+
111
+ points
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,196 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "geometry"
4
+
5
+ module ChemScanner
6
+ include Geometry
7
+
8
+ # Monkey patch ruby-geometry class
9
+ module Extension
10
+ # Monkey patch Segment class
11
+ refine Geometry::Segment do
12
+ def points
13
+ [point1, point2]
14
+ end
15
+
16
+ def contains_point?(point)
17
+ l1 = Geometry.distance(point1, point)
18
+ l2 = Geometry.distance(point, point2)
19
+
20
+ length.round(2) === (l1 + l2).round(2)
21
+ end
22
+
23
+ def contains_segment?(other)
24
+ contains_point?(other.point1) && contains_point?(other.point2)
25
+ end
26
+
27
+ def center
28
+ Point.new((point1.x + point2.x) / 2, (point1.y + point2.y) / 2)
29
+ end
30
+
31
+ def to_line
32
+ Line.new(point1, point2)
33
+ end
34
+
35
+ def intersects_with_polygon?(polygon)
36
+ count = 0
37
+ polygon.edges.each do |edge|
38
+ count += 1 if edge.intersects_with?(self)
39
+ end
40
+
41
+ count > 1
42
+ end
43
+
44
+ def intersects_with_line?(line)
45
+ sline = to_line
46
+ inter_x = sline.intersect_x(line)
47
+ return false if inter_x.nil?
48
+
49
+ inter_y = line.y_from_x(inter_x)
50
+ inter_y = to_line.y_from_x(inter_x) if inter_y.nil?
51
+ return false if inter_y.nil?
52
+
53
+ point = Point.new(inter_x, inter_y)
54
+ contains_point?(point)
55
+ end
56
+
57
+ def head_perpen_points_dist(distance)
58
+ dx = point2.x - point1.x
59
+ dy = point2.y - point1.y
60
+ dist = Math.sqrt((dx * dx) + (dy * dy))
61
+ dx /= dist
62
+ dy /= dist
63
+ x3 = point2.x + (distance * dy)
64
+ y3 = point2.y - (distance * dx)
65
+ x4 = point2.x - (distance * dy)
66
+ y4 = point2.y + (distance * dx)
67
+ [Point.new(x3, y3), Point.new(x4, y4)]
68
+ end
69
+
70
+ def tail_perpen_points_dist(distance)
71
+ dx = point1.x - point2.x
72
+ dy = point1.y - point2.y
73
+ dist = Math.sqrt((dx * dx) + (dy * dy))
74
+ dx /= dist
75
+ dy /= dist
76
+ x3 = point1.x + (distance * dy)
77
+ y3 = point1.y - (distance * dx)
78
+ x4 = point1.x - (distance * dy)
79
+ y4 = point1.y + (distance * dx)
80
+ [Point.new(x4, y4), Point.new(x3, y3)]
81
+ end
82
+
83
+ def parallel_at(point)
84
+ x4 = point.x + point2.x - point1.x
85
+ y4 = point.y + point2.y - point1.y
86
+ Point.new(x4, y4)
87
+ end
88
+
89
+ def euclid_distance_to(other)
90
+ l1 = point1.distance_to(other.point1)
91
+ l2 = point2.distance_to(other.point1)
92
+ l3 = point1.distance_to(other.point2)
93
+ l4 = point2.distance_to(other.point2)
94
+
95
+ [l1, l2, l3, l4].min
96
+ end
97
+
98
+ def euclid_distance_to_point(point)
99
+ l1 = point1.distance_to(point)
100
+ l2 = point2.distance_to(point)
101
+
102
+ [l1, l2].min
103
+ end
104
+
105
+ def euclid_distance_to_polygon(poly)
106
+ dist = []
107
+
108
+ poly.bounding_box.edges.each do |edge|
109
+ min_dist = euclid_distance_to(edge)
110
+ dist.push(min_dist)
111
+ end
112
+
113
+ dist.min
114
+ end
115
+
116
+ def distance_to_boundingbox(bbox)
117
+ dists = []
118
+
119
+ bbox.edges.each do |edge|
120
+ dist = distance_to_segment(edge)
121
+ dists.push(dist)
122
+ end
123
+
124
+ dists.min
125
+ end
126
+
127
+ def distance_to_segment(other)
128
+ [
129
+ other.distance_to(point1),
130
+ other.distance_to(point2),
131
+ distance_to(other.point1),
132
+ distance_to(other.point2),
133
+ ].min
134
+ end
135
+
136
+ def perpen_segment_via_point(point)
137
+ sline = to_line
138
+ pline = sline.perpen_line_via_point(point)
139
+
140
+ inter_point = pline.intersection_points_with(sline)
141
+ return nil if inter_point.nil?
142
+
143
+ Segment.new(point, inter_point)
144
+ end
145
+
146
+ def point_in_range(point, range, from_head = nil)
147
+ return false unless contains_point?(point)
148
+
149
+ dist1 = point1.distance_to(point)
150
+ dist2 = point2.distance_to(point)
151
+
152
+ dist = case from_head
153
+ when true then dist1
154
+ when false then dist2
155
+ when nil then [dist1, dist2].max
156
+ else return false
157
+ end
158
+
159
+ (dist / length) < range
160
+ end
161
+
162
+ def polygon_in_range(polygon)
163
+ line = to_line
164
+
165
+ polygon.vertices.each do |vertex|
166
+ ppoint = line.point_projection(vertex)
167
+ return true if contains_point?(ppoint)
168
+ end
169
+
170
+ false
171
+ end
172
+
173
+ def slice_to_many_points(num)
174
+ return [] if num < 2
175
+
176
+ delta_x = (point1.x - point2.x).abs
177
+ delta_y = (point1.y - point2.y).abs
178
+
179
+ avg_x = delta_x / (num + 1)
180
+ avg_y = delta_y / (num + 1)
181
+ default = OpenStruct.new(
182
+ x: [point1.x, point2.x].min, y: [point1.y, point2.y].min,
183
+ )
184
+
185
+ (1..num).to_a.reduce([]) do |arr, _|
186
+ prev = arr.last || default
187
+ arr.push(Point.new(prev.x + avg_x, prev.y + avg_y))
188
+ end
189
+ end
190
+
191
+ def to_gis
192
+ "SEGMENT((#{point1.x}, #{point1.y}), (#{point2.x}, #{point2.y}))"
193
+ end
194
+ end
195
+ end
196
+ end