chem_scanner 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +13 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +604 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/CODE_OF_CONDUCT.md +74 -0
  9. data/Gemfile +20 -0
  10. data/LICENSE.txt +661 -0
  11. data/README.md +177 -0
  12. data/Rakefile +8 -0
  13. data/bin/console +14 -0
  14. data/bin/setup +8 -0
  15. data/chem_scanner.gemspec +43 -0
  16. data/lib/chem_scanner.rb +79 -0
  17. data/lib/chem_scanner/cdx.rb +67 -0
  18. data/lib/chem_scanner/cdxml.rb +72 -0
  19. data/lib/chem_scanner/chem_draw/cdx_reader.rb +101 -0
  20. data/lib/chem_scanner/chem_draw/node/base_node.rb +123 -0
  21. data/lib/chem_scanner/chem_draw/node/base_value.rb +257 -0
  22. data/lib/chem_scanner/chem_draw/node/bond.rb +100 -0
  23. data/lib/chem_scanner/chem_draw/node/bracket_attachment.rb +17 -0
  24. data/lib/chem_scanner/chem_draw/node/bracket_group.rb +32 -0
  25. data/lib/chem_scanner/chem_draw/node/chem_geometry.rb +58 -0
  26. data/lib/chem_scanner/chem_draw/node/color_table.rb +46 -0
  27. data/lib/chem_scanner/chem_draw/node/font_table.rb +54 -0
  28. data/lib/chem_scanner/chem_draw/node/fragment.rb +149 -0
  29. data/lib/chem_scanner/chem_draw/node/fragment_node.rb +145 -0
  30. data/lib/chem_scanner/chem_draw/node/graphic.rb +94 -0
  31. data/lib/chem_scanner/chem_draw/node/text.rb +242 -0
  32. data/lib/chem_scanner/chem_draw/parser.rb +214 -0
  33. data/lib/chem_scanner/chem_draw/yaml/cdx_objects.yaml +32 -0
  34. data/lib/chem_scanner/chem_draw/yaml/cdx_props.yaml +263 -0
  35. data/lib/chem_scanner/chem_draw/yaml/cdxml_objects.yaml +36 -0
  36. data/lib/chem_scanner/chem_draw/yaml/cdxml_props.yaml +263 -0
  37. data/lib/chem_scanner/chem_draw/yaml/props_data_type.yaml +263 -0
  38. data/lib/chem_scanner/configuration/abbreviation.rb +76 -0
  39. data/lib/chem_scanner/configuration/superatom.rb +76 -0
  40. data/lib/chem_scanner/configuration/superatom.txt +2874 -0
  41. data/lib/chem_scanner/configuration/util.rb +40 -0
  42. data/lib/chem_scanner/configuration/yaml/abbreviations.yaml +6399 -0
  43. data/lib/chem_scanner/configuration/yaml/elements.yaml +115 -0
  44. data/lib/chem_scanner/configuration/yaml/solvents.yaml +16 -0
  45. data/lib/chem_scanner/doc.rb +56 -0
  46. data/lib/chem_scanner/docx.rb +86 -0
  47. data/lib/chem_scanner/export/cml.rb +176 -0
  48. data/lib/chem_scanner/extension/element_map.rb +9 -0
  49. data/lib/chem_scanner/extension/geometry/bounding_box.rb +84 -0
  50. data/lib/chem_scanner/extension/geometry/line.rb +123 -0
  51. data/lib/chem_scanner/extension/geometry/point.rb +18 -0
  52. data/lib/chem_scanner/extension/geometry/polygon.rb +115 -0
  53. data/lib/chem_scanner/extension/geometry/segment.rb +196 -0
  54. data/lib/chem_scanner/extension/passthrough.rb +7 -0
  55. data/lib/chem_scanner/interpreter/element/arrow.rb +298 -0
  56. data/lib/chem_scanner/interpreter/element/atom.rb +134 -0
  57. data/lib/chem_scanner/interpreter/element/fragment.rb +59 -0
  58. data/lib/chem_scanner/interpreter/element/molecule.rb +473 -0
  59. data/lib/chem_scanner/interpreter/element/molecule_group.rb +34 -0
  60. data/lib/chem_scanner/interpreter/element/reaction.rb +186 -0
  61. data/lib/chem_scanner/interpreter/element/reaction_step.rb +39 -0
  62. data/lib/chem_scanner/interpreter/formula_to_mol.rb +75 -0
  63. data/lib/chem_scanner/interpreter/post_process/assemble.rb +38 -0
  64. data/lib/chem_scanner/interpreter/post_process/label_by_molecule.rb +37 -0
  65. data/lib/chem_scanner/interpreter/post_process/reaction_info.rb +225 -0
  66. data/lib/chem_scanner/interpreter/post_process/reaction_step.rb +95 -0
  67. data/lib/chem_scanner/interpreter/post_process/reagent_label.rb +46 -0
  68. data/lib/chem_scanner/interpreter/post_process/text_as_molecule.rb +52 -0
  69. data/lib/chem_scanner/interpreter/post_process/text_label.rb +40 -0
  70. data/lib/chem_scanner/interpreter/pre_process/arrow.rb +197 -0
  71. data/lib/chem_scanner/interpreter/pre_process/graphic.rb +41 -0
  72. data/lib/chem_scanner/interpreter/pre_process/molecule.rb +150 -0
  73. data/lib/chem_scanner/interpreter/reaction_detection/assign_to_reaction.rb +129 -0
  74. data/lib/chem_scanner/interpreter/reaction_detection/duplicate_reagents.rb +50 -0
  75. data/lib/chem_scanner/interpreter/reaction_detection/molecule_group.rb +55 -0
  76. data/lib/chem_scanner/interpreter/reaction_detection/multi_line_chain_reaction.rb +85 -0
  77. data/lib/chem_scanner/interpreter/reaction_detection/remove_separated_mol.rb +115 -0
  78. data/lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb +166 -0
  79. data/lib/chem_scanner/interpreter/scheme.rb +173 -0
  80. data/lib/chem_scanner/interpreter/scheme_base.rb +64 -0
  81. data/lib/chem_scanner/interpreter/text_group/bold_groups.rb +183 -0
  82. data/lib/chem_scanner/interpreter/text_group/molecule_text_group.rb +138 -0
  83. data/lib/chem_scanner/interpreter/text_group/reaction_text_groups.rb +221 -0
  84. data/lib/chem_scanner/interpreter/text_group/retrieve_alias_info.rb +41 -0
  85. data/lib/chem_scanner/interpreter/text_group/retrieve_n_atoms.rb +106 -0
  86. data/lib/chem_scanner/interpreter/text_group/text_group_interpreter.rb +92 -0
  87. data/lib/chem_scanner/perkin_eln.rb +287 -0
  88. data/lib/chem_scanner/version.rb +5 -0
  89. data/lib/rubygems_plugin.rb +5 -0
  90. metadata +244 -0
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ class ElementMap < Hash
5
+ def except(id)
6
+ reject { |key, _| key == id }
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "geometry"
4
+
5
+ module ChemScanner
6
+ # Extension module
7
+ module Extension
8
+ include Geometry
9
+
10
+ # Monkey path BoundingBox class from ruby-geometry
11
+ refine Geometry::BoundingBox do
12
+ def lefttop
13
+ Point.new(leftbottom.x, righttop.y)
14
+ end
15
+
16
+ def rightbottom
17
+ Point.new(righttop.x, leftbottom.y)
18
+ end
19
+
20
+ def center
21
+ lb = leftbottom
22
+ rt = righttop
23
+
24
+ Point.new((lb.x + rt.x) / 2, (lb.y + rt.y) / 2)
25
+ end
26
+
27
+ def edges
28
+ [
29
+ Segment.new(leftbottom, lefttop),
30
+ Segment.new(leftbottom, rightbottom),
31
+ Segment.new(lefttop, righttop),
32
+ Segment.new(rightbottom, righttop),
33
+ ]
34
+ end
35
+
36
+ def points
37
+ [leftbottom, lefttop, righttop, rightbottom]
38
+ end
39
+
40
+ def euclid_distance_to(other)
41
+ distance_list = []
42
+
43
+ edges.each do |edge|
44
+ other.edges.each do |oedge|
45
+ distance_list.push(edge.euclid_distance_to(oedge))
46
+ end
47
+ end
48
+
49
+ distance_list.min
50
+ end
51
+
52
+ def distance_to_point(point)
53
+ distance_list = []
54
+
55
+ edges.each do |edge|
56
+ distance_list.push(edge.distance_to(point))
57
+ end
58
+
59
+ distance_list.min
60
+ end
61
+
62
+ def euclid_distance_to_point(point)
63
+ point.distance_to(center)
64
+ end
65
+
66
+ def area
67
+ Segment.new(leftbottom, lefttop).length *
68
+ Segment.new(lefttop, righttop).length
69
+ end
70
+
71
+ def to_gis
72
+ coords = points.map { |point| "(#{point.x}, #{point.y})" }.join(",")
73
+ "POLYGON(#{coords})"
74
+ end
75
+
76
+ def contains_point?(point)
77
+ (
78
+ point.x <= righttop.x && point.x >= leftbottom.x &&
79
+ point.y <= righttop.y && point.y >= leftbottom.y
80
+ )
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,123 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "geometry"
4
+
5
+ module ChemScanner
6
+ # Monkey path Line class from ruby-geometry
7
+ module Extension
8
+ include Geometry
9
+
10
+ # Monkey patch Line class
11
+ refine Geometry::Line do
12
+ def angle
13
+ return 90 if vertical?
14
+ return 0 if horizontal?
15
+
16
+ p1, p2 = [point1, point2].sort_by(&:x)
17
+ delta_x = p1.x - p2.x
18
+ delta_y = p1.y - p2.y
19
+
20
+ arc = if point1.y > point2.y # inverted axis/origin
21
+ Math.atan(delta_y / delta_x)
22
+ else
23
+ Math.atan(delta_x / delta_y)
24
+ end
25
+ (arc.positive? ? arc : (2 * Math::PI + arc)) * 360 / (2 * Math::PI)
26
+ end
27
+
28
+ def to_segment
29
+ Segment.new(point1, point2)
30
+ end
31
+
32
+ def abc_coeff
33
+ a = point2.y - point1.y
34
+ b = point1.x - point2.x
35
+ c = a * point1.x + b * point1.y
36
+
37
+ [a, b, c]
38
+ end
39
+
40
+ # Get point belong to the line, give x or y
41
+ def get_point(value, is_y = false)
42
+ if is_y
43
+ x = x_from_y(value)
44
+ Point.new(x, Float(value))
45
+ end
46
+
47
+ y = y_from_x(value)
48
+ Point.new(Float(value), y)
49
+ end
50
+
51
+ def x_from_y(point_y)
52
+ b = point1.y - point2.y
53
+ return nil if b.zero?
54
+
55
+ Float(point1.x - ((point1.y - point_y) * (point1.x - point2.x) / b))
56
+ end
57
+
58
+ def y_from_x(point_x)
59
+ b = point1.x - point2.x
60
+ return nil if b.zero?
61
+
62
+ Float(point1.y - ((point1.x - point_x) * (point1.y - point2.y)) / b)
63
+ end
64
+
65
+ def intersects_with_segment?(segment)
66
+ segment.intersects_with_line?(self)
67
+ end
68
+
69
+ def intersects_with_polygon?(polygon)
70
+ polygon.edges.each do |edge|
71
+ return true if intersects_with_segment?(edge)
72
+ end
73
+
74
+ false
75
+ end
76
+
77
+ def intersection_points_with_polygon(polygon)
78
+ polygon.intersection_points_with_line(self)
79
+ end
80
+
81
+ def intersection_points_with(line)
82
+ return nil if parallel_to?(line)
83
+
84
+ # Ax + By = C
85
+ a1, b1, c1 = abc_coeff
86
+ a2, b2, c2 = line.abc_coeff
87
+
88
+ determinant = a1 * b2 - a2 * b1
89
+
90
+ x = (b2 * c1 - b1 * c2) / determinant
91
+ y = (a1 * c2 - a2 * c1) / determinant
92
+
93
+ Point.new(x, y)
94
+ end
95
+
96
+ # positive: same side with point2
97
+ # negative: same side with point1
98
+ def point_side(point)
99
+ v = Segment.new(point1, point).to_vector
100
+ to_segment.to_vector.cross_product(v)
101
+ end
102
+
103
+ def perpen_line_via_point(point)
104
+ if vertical?
105
+ Line.new(point, Point.new(point.x + 5, point.y))
106
+ elsif horizontal?
107
+ Line.new(point, Point.new(point.x, point.y + 5))
108
+ else
109
+ m2 = (-1 / slope)
110
+ x2 = point.x + 5
111
+ y2 = m2 * x2 + (point.y - m2 * point.x)
112
+
113
+ Line.new(point, Point.new(x2, y2))
114
+ end
115
+ end
116
+
117
+ def point_projection(point)
118
+ pline = perpen_line_via_point(point)
119
+ pline.intersection_points_with(self)
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "geometry"
4
+
5
+ module ChemScanner
6
+ # Monkey patch Point class from ruby-geometry
7
+ module Extension
8
+ refine Geometry::Point do
9
+ def euclid_distance_to_polygon(polygon)
10
+ polygon.euclid_distance_to_point(self)
11
+ end
12
+
13
+ def distance_to(other)
14
+ Geometry.distance(self, other)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "geometry"
4
+
5
+ module ChemScanner
6
+ include Geometry
7
+
8
+ # Extension module
9
+ module Extension
10
+ # Monkey path Polygon class from ruby-geometry
11
+ refine Geometry::Polygon do
12
+ def center
13
+ lb = bounding_box.leftbottom
14
+ rt = bounding_box.righttop
15
+
16
+ Point.new((lb.x + rt.x) / 2, (lb.y + rt.y) / 2)
17
+ end
18
+
19
+ def height
20
+ lb = bounding_box.leftbottom
21
+ lb.distance_to(bounding_box.lefttop)
22
+ end
23
+
24
+ def width
25
+ lb = bounding_box.leftbottom
26
+ lb.distance_to(bounding_box.rightbottom)
27
+ end
28
+
29
+ def intersects_with_polygon?(other)
30
+ edges.each do |e1|
31
+ other.edges.each do |e2|
32
+ return true if e1.intersects_with?(e2)
33
+ end
34
+ end
35
+
36
+ false
37
+ end
38
+
39
+ def contains_polygon?(other)
40
+ other.vertices.each do |v1|
41
+ return false unless contains?(v1)
42
+ end
43
+
44
+ true
45
+ end
46
+
47
+ def around_polygon?(other)
48
+ (
49
+ contains_polygon?(other) || other.contains_polygon?(self) ||
50
+ contains?(other.center) || other.contains?(center)
51
+ )
52
+ end
53
+
54
+ def merge_polygon(another)
55
+ lb = bounding_box.leftbottom
56
+ rt = bounding_box.righttop
57
+
58
+ alb = another.bounding_box.leftbottom
59
+ art = another.bounding_box.righttop
60
+
61
+ left = [lb.x, alb.x].min
62
+ bottom = [lb.y, alb.y].min
63
+ right = [rt.x, art.x].max
64
+ top = [rt.y, art.y].max
65
+
66
+ p1 = Point.new(left, bottom)
67
+ p2 = Point.new(left, top)
68
+ p3 = Point.new(right, top)
69
+ p4 = Point.new(right, bottom)
70
+
71
+ Polygon.new([p1, p2, p3, p4])
72
+ end
73
+
74
+ def distance_to_point(point)
75
+ min_dist = 9_999_999
76
+
77
+ edges.each do |edge|
78
+ dist = edge.distance_to(point)
79
+ min_dist = dist if dist < min_dist
80
+ end
81
+
82
+ min_dist
83
+ end
84
+
85
+ def euclid_distance_to_point(point)
86
+ min_dist = 9_999_999
87
+
88
+ edges.each do |edge|
89
+ dist = edge.euclid_distance_to_point(point)
90
+ min_dist = dist if dist < min_dist
91
+ end
92
+
93
+ min_dist
94
+ end
95
+
96
+ def intersection_points_with_line(line)
97
+ points = []
98
+
99
+ edges.each do |edge|
100
+ eline = edge.to_line
101
+ inter_x = eline.intersect_x(line)
102
+ next if inter_x.nil?
103
+
104
+ inter_y = line.y_from_x(inter_x)
105
+ inter_y = edge.to_line.y_from_x(inter_x) if inter_y.nil?
106
+
107
+ point = Point.new(inter_x, inter_y)
108
+ points.push(point) if edge.contains_point?(point)
109
+ end
110
+
111
+ points
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,196 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "geometry"
4
+
5
+ module ChemScanner
6
+ include Geometry
7
+
8
+ # Monkey patch ruby-geometry class
9
+ module Extension
10
+ # Monkey patch Segment class
11
+ refine Geometry::Segment do
12
+ def points
13
+ [point1, point2]
14
+ end
15
+
16
+ def contains_point?(point)
17
+ l1 = Geometry.distance(point1, point)
18
+ l2 = Geometry.distance(point, point2)
19
+
20
+ length.round(2) === (l1 + l2).round(2)
21
+ end
22
+
23
+ def contains_segment?(other)
24
+ contains_point?(other.point1) && contains_point?(other.point2)
25
+ end
26
+
27
+ def center
28
+ Point.new((point1.x + point2.x) / 2, (point1.y + point2.y) / 2)
29
+ end
30
+
31
+ def to_line
32
+ Line.new(point1, point2)
33
+ end
34
+
35
+ def intersects_with_polygon?(polygon)
36
+ count = 0
37
+ polygon.edges.each do |edge|
38
+ count += 1 if edge.intersects_with?(self)
39
+ end
40
+
41
+ count > 1
42
+ end
43
+
44
+ def intersects_with_line?(line)
45
+ sline = to_line
46
+ inter_x = sline.intersect_x(line)
47
+ return false if inter_x.nil?
48
+
49
+ inter_y = line.y_from_x(inter_x)
50
+ inter_y = to_line.y_from_x(inter_x) if inter_y.nil?
51
+ return false if inter_y.nil?
52
+
53
+ point = Point.new(inter_x, inter_y)
54
+ contains_point?(point)
55
+ end
56
+
57
+ def head_perpen_points_dist(distance)
58
+ dx = point2.x - point1.x
59
+ dy = point2.y - point1.y
60
+ dist = Math.sqrt((dx * dx) + (dy * dy))
61
+ dx /= dist
62
+ dy /= dist
63
+ x3 = point2.x + (distance * dy)
64
+ y3 = point2.y - (distance * dx)
65
+ x4 = point2.x - (distance * dy)
66
+ y4 = point2.y + (distance * dx)
67
+ [Point.new(x3, y3), Point.new(x4, y4)]
68
+ end
69
+
70
+ def tail_perpen_points_dist(distance)
71
+ dx = point1.x - point2.x
72
+ dy = point1.y - point2.y
73
+ dist = Math.sqrt((dx * dx) + (dy * dy))
74
+ dx /= dist
75
+ dy /= dist
76
+ x3 = point1.x + (distance * dy)
77
+ y3 = point1.y - (distance * dx)
78
+ x4 = point1.x - (distance * dy)
79
+ y4 = point1.y + (distance * dx)
80
+ [Point.new(x4, y4), Point.new(x3, y3)]
81
+ end
82
+
83
+ def parallel_at(point)
84
+ x4 = point.x + point2.x - point1.x
85
+ y4 = point.y + point2.y - point1.y
86
+ Point.new(x4, y4)
87
+ end
88
+
89
+ def euclid_distance_to(other)
90
+ l1 = point1.distance_to(other.point1)
91
+ l2 = point2.distance_to(other.point1)
92
+ l3 = point1.distance_to(other.point2)
93
+ l4 = point2.distance_to(other.point2)
94
+
95
+ [l1, l2, l3, l4].min
96
+ end
97
+
98
+ def euclid_distance_to_point(point)
99
+ l1 = point1.distance_to(point)
100
+ l2 = point2.distance_to(point)
101
+
102
+ [l1, l2].min
103
+ end
104
+
105
+ def euclid_distance_to_polygon(poly)
106
+ dist = []
107
+
108
+ poly.bounding_box.edges.each do |edge|
109
+ min_dist = euclid_distance_to(edge)
110
+ dist.push(min_dist)
111
+ end
112
+
113
+ dist.min
114
+ end
115
+
116
+ def distance_to_boundingbox(bbox)
117
+ dists = []
118
+
119
+ bbox.edges.each do |edge|
120
+ dist = distance_to_segment(edge)
121
+ dists.push(dist)
122
+ end
123
+
124
+ dists.min
125
+ end
126
+
127
+ def distance_to_segment(other)
128
+ [
129
+ other.distance_to(point1),
130
+ other.distance_to(point2),
131
+ distance_to(other.point1),
132
+ distance_to(other.point2),
133
+ ].min
134
+ end
135
+
136
+ def perpen_segment_via_point(point)
137
+ sline = to_line
138
+ pline = sline.perpen_line_via_point(point)
139
+
140
+ inter_point = pline.intersection_points_with(sline)
141
+ return nil if inter_point.nil?
142
+
143
+ Segment.new(point, inter_point)
144
+ end
145
+
146
+ def point_in_range(point, range, from_head = nil)
147
+ return false unless contains_point?(point)
148
+
149
+ dist1 = point1.distance_to(point)
150
+ dist2 = point2.distance_to(point)
151
+
152
+ dist = case from_head
153
+ when true then dist1
154
+ when false then dist2
155
+ when nil then [dist1, dist2].max
156
+ else return false
157
+ end
158
+
159
+ (dist / length) < range
160
+ end
161
+
162
+ def polygon_in_range(polygon)
163
+ line = to_line
164
+
165
+ polygon.vertices.each do |vertex|
166
+ ppoint = line.point_projection(vertex)
167
+ return true if contains_point?(ppoint)
168
+ end
169
+
170
+ false
171
+ end
172
+
173
+ def slice_to_many_points(num)
174
+ return [] if num < 2
175
+
176
+ delta_x = (point1.x - point2.x).abs
177
+ delta_y = (point1.y - point2.y).abs
178
+
179
+ avg_x = delta_x / (num + 1)
180
+ avg_y = delta_y / (num + 1)
181
+ default = OpenStruct.new(
182
+ x: [point1.x, point2.x].min, y: [point1.y, point2.y].min,
183
+ )
184
+
185
+ (1..num).to_a.reduce([]) do |arr, _|
186
+ prev = arr.last || default
187
+ arr.push(Point.new(prev.x + avg_x, prev.y + avg_y))
188
+ end
189
+ end
190
+
191
+ def to_gis
192
+ "SEGMENT((#{point1.x}, #{point1.y}), (#{point2.x}, #{point2.y}))"
193
+ end
194
+ end
195
+ end
196
+ end