chem_scanner 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +13 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +604 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/CODE_OF_CONDUCT.md +74 -0
  9. data/Gemfile +20 -0
  10. data/LICENSE.txt +661 -0
  11. data/README.md +177 -0
  12. data/Rakefile +8 -0
  13. data/bin/console +14 -0
  14. data/bin/setup +8 -0
  15. data/chem_scanner.gemspec +43 -0
  16. data/lib/chem_scanner.rb +79 -0
  17. data/lib/chem_scanner/cdx.rb +67 -0
  18. data/lib/chem_scanner/cdxml.rb +72 -0
  19. data/lib/chem_scanner/chem_draw/cdx_reader.rb +101 -0
  20. data/lib/chem_scanner/chem_draw/node/base_node.rb +123 -0
  21. data/lib/chem_scanner/chem_draw/node/base_value.rb +257 -0
  22. data/lib/chem_scanner/chem_draw/node/bond.rb +100 -0
  23. data/lib/chem_scanner/chem_draw/node/bracket_attachment.rb +17 -0
  24. data/lib/chem_scanner/chem_draw/node/bracket_group.rb +32 -0
  25. data/lib/chem_scanner/chem_draw/node/chem_geometry.rb +58 -0
  26. data/lib/chem_scanner/chem_draw/node/color_table.rb +46 -0
  27. data/lib/chem_scanner/chem_draw/node/font_table.rb +54 -0
  28. data/lib/chem_scanner/chem_draw/node/fragment.rb +149 -0
  29. data/lib/chem_scanner/chem_draw/node/fragment_node.rb +145 -0
  30. data/lib/chem_scanner/chem_draw/node/graphic.rb +94 -0
  31. data/lib/chem_scanner/chem_draw/node/text.rb +242 -0
  32. data/lib/chem_scanner/chem_draw/parser.rb +214 -0
  33. data/lib/chem_scanner/chem_draw/yaml/cdx_objects.yaml +32 -0
  34. data/lib/chem_scanner/chem_draw/yaml/cdx_props.yaml +263 -0
  35. data/lib/chem_scanner/chem_draw/yaml/cdxml_objects.yaml +36 -0
  36. data/lib/chem_scanner/chem_draw/yaml/cdxml_props.yaml +263 -0
  37. data/lib/chem_scanner/chem_draw/yaml/props_data_type.yaml +263 -0
  38. data/lib/chem_scanner/configuration/abbreviation.rb +76 -0
  39. data/lib/chem_scanner/configuration/superatom.rb +76 -0
  40. data/lib/chem_scanner/configuration/superatom.txt +2874 -0
  41. data/lib/chem_scanner/configuration/util.rb +40 -0
  42. data/lib/chem_scanner/configuration/yaml/abbreviations.yaml +6399 -0
  43. data/lib/chem_scanner/configuration/yaml/elements.yaml +115 -0
  44. data/lib/chem_scanner/configuration/yaml/solvents.yaml +16 -0
  45. data/lib/chem_scanner/doc.rb +56 -0
  46. data/lib/chem_scanner/docx.rb +86 -0
  47. data/lib/chem_scanner/export/cml.rb +176 -0
  48. data/lib/chem_scanner/extension/element_map.rb +9 -0
  49. data/lib/chem_scanner/extension/geometry/bounding_box.rb +84 -0
  50. data/lib/chem_scanner/extension/geometry/line.rb +123 -0
  51. data/lib/chem_scanner/extension/geometry/point.rb +18 -0
  52. data/lib/chem_scanner/extension/geometry/polygon.rb +115 -0
  53. data/lib/chem_scanner/extension/geometry/segment.rb +196 -0
  54. data/lib/chem_scanner/extension/passthrough.rb +7 -0
  55. data/lib/chem_scanner/interpreter/element/arrow.rb +298 -0
  56. data/lib/chem_scanner/interpreter/element/atom.rb +134 -0
  57. data/lib/chem_scanner/interpreter/element/fragment.rb +59 -0
  58. data/lib/chem_scanner/interpreter/element/molecule.rb +473 -0
  59. data/lib/chem_scanner/interpreter/element/molecule_group.rb +34 -0
  60. data/lib/chem_scanner/interpreter/element/reaction.rb +186 -0
  61. data/lib/chem_scanner/interpreter/element/reaction_step.rb +39 -0
  62. data/lib/chem_scanner/interpreter/formula_to_mol.rb +75 -0
  63. data/lib/chem_scanner/interpreter/post_process/assemble.rb +38 -0
  64. data/lib/chem_scanner/interpreter/post_process/label_by_molecule.rb +37 -0
  65. data/lib/chem_scanner/interpreter/post_process/reaction_info.rb +225 -0
  66. data/lib/chem_scanner/interpreter/post_process/reaction_step.rb +95 -0
  67. data/lib/chem_scanner/interpreter/post_process/reagent_label.rb +46 -0
  68. data/lib/chem_scanner/interpreter/post_process/text_as_molecule.rb +52 -0
  69. data/lib/chem_scanner/interpreter/post_process/text_label.rb +40 -0
  70. data/lib/chem_scanner/interpreter/pre_process/arrow.rb +197 -0
  71. data/lib/chem_scanner/interpreter/pre_process/graphic.rb +41 -0
  72. data/lib/chem_scanner/interpreter/pre_process/molecule.rb +150 -0
  73. data/lib/chem_scanner/interpreter/reaction_detection/assign_to_reaction.rb +129 -0
  74. data/lib/chem_scanner/interpreter/reaction_detection/duplicate_reagents.rb +50 -0
  75. data/lib/chem_scanner/interpreter/reaction_detection/molecule_group.rb +55 -0
  76. data/lib/chem_scanner/interpreter/reaction_detection/multi_line_chain_reaction.rb +85 -0
  77. data/lib/chem_scanner/interpreter/reaction_detection/remove_separated_mol.rb +115 -0
  78. data/lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb +166 -0
  79. data/lib/chem_scanner/interpreter/scheme.rb +173 -0
  80. data/lib/chem_scanner/interpreter/scheme_base.rb +64 -0
  81. data/lib/chem_scanner/interpreter/text_group/bold_groups.rb +183 -0
  82. data/lib/chem_scanner/interpreter/text_group/molecule_text_group.rb +138 -0
  83. data/lib/chem_scanner/interpreter/text_group/reaction_text_groups.rb +221 -0
  84. data/lib/chem_scanner/interpreter/text_group/retrieve_alias_info.rb +41 -0
  85. data/lib/chem_scanner/interpreter/text_group/retrieve_n_atoms.rb +106 -0
  86. data/lib/chem_scanner/interpreter/text_group/text_group_interpreter.rb +92 -0
  87. data/lib/chem_scanner/perkin_eln.rb +287 -0
  88. data/lib/chem_scanner/version.rb +5 -0
  89. data/lib/rubygems_plugin.rb +5 -0
  90. metadata +244 -0
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module ChemDraw
5
+ CDX_BOND_ORDER = {
6
+ 0x0001 => 1,
7
+ 0x0002 => 2,
8
+ 0x0004 => 3,
9
+ 0x0008 => 4,
10
+ 0x0010 => 5,
11
+ 0x0020 => 6,
12
+ 0x0040 => 0.5,
13
+ 0x0080 => 1.5,
14
+ 0x0100 => 2.5,
15
+ 0x0200 => 3.5,
16
+ 0x0400 => 4.5,
17
+ 0x0800 => 5.5,
18
+ 0x1000 => "dative",
19
+ 0x2000 => "ionic",
20
+ 0x4000 => "hydrogen",
21
+ }.freeze
22
+
23
+ CDXML_BOND_DISPLAY = {
24
+ "Solid" => 0,
25
+ "Dash" => 1,
26
+ "Hash" => 2,
27
+ "WedgedHashBegin" => 3,
28
+ "WedgedHashEnd" => 4,
29
+ "Bold" => 5,
30
+ "WedgeBegin" => 6,
31
+ "WedgeEnd" => 7,
32
+ "Wavy" => 8,
33
+ "HollowWedgeBegin" => 9,
34
+ "HollowWedgeEnd" => 10,
35
+ "WavyWedgeBegin" => 11,
36
+ "WavyWedgeEnd" => 12,
37
+ "Dot" => 13,
38
+ "DashDot" => 14,
39
+ }.freeze
40
+
41
+ # CDX Bond parser
42
+ class Bond < BaseNode
43
+ attr_accessor :begin_id, :end_id, :stereo, :order, :color
44
+
45
+ def initialize(parser, parser_type, id)
46
+ super(parser, parser_type, id)
47
+
48
+ @begin_id = nil
49
+ @end_id = nil
50
+ @stereo = 0
51
+ @order = 1
52
+
53
+ @color = 0
54
+ end
55
+
56
+ def parse_node(tag, _id, data)
57
+ case @props_ref[tag]
58
+ when "Bond_Begin" then @begin_id = read_value(tag, data)
59
+ when "Bond_End" then @end_id = read_value(tag, data)
60
+ when "Bond_Order" then @order = bond_order(read_value(tag, data))
61
+ when "Bond_Display" then @stereo = bond_display(tag, data)
62
+ when "ForegroundColor" then @color = read_value(tag, data)
63
+ else do_unhandled(tag)
64
+ end
65
+ end
66
+
67
+ def bond_order(val)
68
+ return val if @parser_type == "cdxml"
69
+
70
+ CDX_BOND_ORDER[val] || 0
71
+ end
72
+
73
+ def bond_display(tag, data)
74
+ return read_value(tag, data) if @parser_type == "cdx"
75
+
76
+ CDXML_BOND_DISPLAY[data.text]
77
+ end
78
+
79
+ def end_points
80
+ [@begin_id, @end_id]
81
+ end
82
+
83
+ def replace_endpoint(endpoint, new_point)
84
+ if @begin_id == endpoint
85
+ @begin_id = new_point
86
+ elsif @end_id == endpoint
87
+ @end_id = new_point
88
+ end
89
+ end
90
+
91
+ def other_endpoint(endpoint)
92
+ endpoint == @begin_id ? @end_id : @begin_id
93
+ end
94
+
95
+ def has_endpoint?(id)
96
+ [@begin_id, @end_id].include?(id)
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module ChemDraw
5
+ # CDX Bracket parser
6
+ class BracketAttachment < BaseNode
7
+ attr_reader :graphic_id
8
+
9
+ def parse_node(tag, _id, data)
10
+ ref = @props_ref[tag] || @obj_ref[tag]
11
+ return unless ref == "Bracket_GraphicID"
12
+
13
+ @graphic_id = read_value(tag, data)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module ChemDraw
5
+ # CDX Bracket parser
6
+ class BracketGroup < BaseNode
7
+ require "chem_scanner/chem_draw/node/bracket_attachment"
8
+
9
+ attr_reader :attachments, :object_ids
10
+
11
+ def initialize(parser, parser_type, id)
12
+ super(parser, parser_type, id)
13
+
14
+ @attachments = []
15
+ @object_ids = []
16
+ end
17
+
18
+ def parse_node(tag, id, data)
19
+ if @props_ref[tag] == "BracketedObjects"
20
+ @object_ids = read_value(tag, data)
21
+ return
22
+ end
23
+
24
+ return do_unhandled(tag) unless @obj_ref[tag] == "BracketAttachment"
25
+
26
+ attachment = BracketAttachment.new(@parser, @parser_type, id)
27
+ attachment.read
28
+ @attachments.push(attachment)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module ChemDraw
5
+ # Geometry parser
6
+ class ChemGeometry < BaseNode
7
+ attr_reader :tail, :head, :head_type, :nogo, :line_type
8
+
9
+ def initialize(parser, parser_type, id)
10
+ super(parser, parser_type, id)
11
+
12
+ @middle_points = []
13
+ @line_type = 0
14
+ end
15
+
16
+ # NOTE: head ----> tail (head at tail)
17
+ def parse_node(tag, _nid, data)
18
+ case @props_ref[tag]
19
+ when "3DTail"
20
+ x, y = read_value(tag, data)
21
+ @tail = { x: x, y: y }
22
+ when "3DHead"
23
+ x, y = read_value(tag, data)
24
+ @head = { x: x, y: y }
25
+ when "Arrow_ArrowHead_Head"
26
+ @arrow_head = read_type(tag, data, CDXML_ARROW_TYPE)
27
+ when "Arrow_NoGo" then @nogo = read_value(tag, data)
28
+ when "Line_Type"
29
+ @line_type = read_type(tag, data, CDXML_LINE_TYPE)
30
+ else do_unhandled(tag)
31
+ end
32
+ end
33
+
34
+ def segment
35
+ Geometry::Segment.new_by_arrays(
36
+ [@tail[:x], @tail[:y]],
37
+ [@head[:x], @head[:y]],
38
+ )
39
+ end
40
+
41
+ def vector
42
+ segment.to_vector
43
+ end
44
+
45
+ def line
46
+ segment.to_line
47
+ end
48
+
49
+ def headless
50
+ @arrow_head != 2
51
+ end
52
+
53
+ def cross?
54
+ !@nogo.nil?
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,46 @@
1
+ module ChemScanner
2
+ module ChemDraw
3
+ # ColorTable
4
+ class ColorTable < BaseNode
5
+ attr_reader :table
6
+
7
+ def initialize(parser_type, data)
8
+ @parser_type = parser_type
9
+ @data = data
10
+ end
11
+
12
+ def read
13
+ @parser_type == "cdx" ? read_cdx : read_cdxml
14
+ end
15
+
16
+ def read_cdx
17
+ @nums = read_int(@data[0, 2], true)
18
+ rgbs = binary_chunks(@data[2..-1], 2).map { |x| read_int(x, true) }
19
+
20
+ table = rgbs.each_slice(3).to_a.map do |x|
21
+ x.reduce("") do |memo, c|
22
+ rgb = c >> 8
23
+ memo << rgb.to_s(16).rjust(2, "0")
24
+ end
25
+ end
26
+
27
+ @table = %w[000000 FFFFFF] + table
28
+ end
29
+
30
+ def read_cdxml
31
+ table = @data.element_children.each_with_object([]) do |color, t|
32
+ next if color.name != "color"
33
+
34
+ rgb = %w[r g b].reduce("") do |memo, c|
35
+ ct = color.attr(c).to_i * 255
36
+ memo << ct.to_s(16).rjust(2, "0")
37
+ end
38
+
39
+ t.push(rgb)
40
+ end
41
+
42
+ @table = %w[000000 FFFFFF] + table
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module ChemDraw
5
+ # CDX Graphic parser
6
+ class FontTable < BaseNode
7
+ attr_reader :table
8
+
9
+ def initialize(parser_type, data)
10
+ @parser_type = parser_type
11
+ @data = data
12
+ end
13
+
14
+ def read
15
+ @parser_type == "cdx" ? read_cdx : read_cdxml
16
+ end
17
+
18
+ def read_cdx
19
+ @table = []
20
+ run_count = read_int(@data[2, 2], true)
21
+
22
+ iter = 4
23
+ (1..run_count).each do
24
+ font, length = read_cdx_font_attribute(iter)
25
+ @table.push(font)
26
+ iter += 6 + length
27
+ end
28
+
29
+ @table
30
+ end
31
+
32
+ def read_cdx_font_attribute(iter)
33
+ id = read_int(@data[iter, 2], true)
34
+ charset = read_int(@data[iter + 2, 2], true)
35
+ length = read_int(@data[iter + 4, 2], true)
36
+ name = @data[iter + 6, length]
37
+
38
+ [{ id: id, charset: charset, name: name }, length]
39
+ end
40
+
41
+ def read_cdxml
42
+ @table = @data.element_children.each_with_object([]) do |font, table|
43
+ next if font.name != "font"
44
+
45
+ id = font.attr("id").to_i
46
+ charset = font.attr("charset")
47
+ name = font.attr("name")
48
+
49
+ table.push(id: id, charset: charset, name: name)
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module ChemDraw
5
+ # CDX Fragment parser
6
+ class Fragment < BaseNode
7
+ require "chem_scanner/chem_draw/node/fragment_node"
8
+ require "chem_scanner/chem_draw/node/bond"
9
+
10
+ attr_accessor :boxed, # indicate if fragment is boxed within an rectangle
11
+ :polygon, :node_map, :bond_map, :graphic_map
12
+
13
+ def initialize(parser, parser_type, id)
14
+ super(parser, parser_type, id)
15
+ @boxed = false
16
+
17
+ @node_map = {}
18
+ @bond_map = {}
19
+
20
+ @graphic_map = {}
21
+ end
22
+
23
+ def parse_node(tag, nid, _data)
24
+ case @props_ref.key?(tag) || @obj_ref[tag]
25
+ when "Node" then create_node(nid)
26
+ when "Bond" then create_bond(nid)
27
+ when "Graphic"
28
+ graphic = Graphic.new(@parser, @parser_type, id)
29
+ graphic.read
30
+ @graphic_map[id] = graphic
31
+ # when "BoundingBox" then @polygon = read_value(tag, data)
32
+
33
+ # NOTE: Indicates that this object represents some properties
34
+ # in some other objects.
35
+ # when "RepresentsProperty" then @represent = true
36
+ else do_unhandled(tag)
37
+ end
38
+ end
39
+
40
+ def post_parse_node
41
+ return if !@polygon.nil? || @node_map.count.zero?
42
+
43
+ rebuild_polygon
44
+ end
45
+
46
+ def rebuild_polygon
47
+ fn = @node_map.first[1]
48
+ lb = Geometry::Point.new(fn.x, fn.y)
49
+ rt = Geometry::Point.new(fn.x, fn.y)
50
+
51
+ @node_map.each_value do |node|
52
+ # next if node.x.nil? || node.y.nil?
53
+ next if node.has_nil_coord?
54
+
55
+ nlb = node.leftbottom
56
+ nrt = node.righttop
57
+
58
+ lb.x = nlb.x if nlb.x < lb.x
59
+ lb.y = nlb.y if nlb.y < lb.y
60
+
61
+ rt.x = nrt.x if nrt.x > rt.x
62
+ rt.y = nrt.y if nrt.y > rt.y
63
+ end
64
+
65
+ points = [
66
+ Geometry::Point.new(lb.x, lb.y),
67
+ Geometry::Point.new(lb.x, rt.y),
68
+ Geometry::Point.new(rt.x, rt.y),
69
+ Geometry::Point.new(rt.x, lb.y),
70
+ ]
71
+ @polygon = Geometry::Polygon.new(points)
72
+ end
73
+
74
+ def create_node(id)
75
+ node = FragmentNode.new(@parser, @parser_type, id)
76
+ node.read
77
+ @node_map[id] = node
78
+ end
79
+
80
+ def create_bond(id)
81
+ bond = Bond.new(@parser, @parser_type, id)
82
+ bond.read
83
+ @bond_map[id] = bond
84
+ end
85
+
86
+ def get_node_with_type(type)
87
+ @node_map.select { |_, v| v.type == type }
88
+ end
89
+
90
+ # Check if fragment has ExternalConnectionPoint node
91
+ def get_external_point
92
+ get_node_with_type(12)
93
+ end
94
+
95
+ # Get the internal id for Fragment node
96
+ def get_internal_nids
97
+ ext_node = get_external_point
98
+ return [] if ext_node.count.zero?
99
+
100
+ ext_ids = ext_node.keys
101
+ internal_ids = []
102
+ ext_ids.each do |ext_id|
103
+ hbond = bond_has_endpoint(ext_id)
104
+ _, bond = hbond
105
+
106
+ internal_ids.push(bond.other_endpoint(ext_id))
107
+ end
108
+ [ext_ids, internal_ids]
109
+ end
110
+
111
+ def bond_has_endpoint(endpoint)
112
+ @bond_map.detect { |_, b| b.end_points.include?(endpoint) }
113
+ end
114
+
115
+ def clone
116
+ cloned = self.class.new(@parser, @parser_type, @id)
117
+ cloned.boxed = @boxed
118
+ cloned.clone_node_map(@node_map)
119
+ cloned.clone_bond_map(@bond_map)
120
+
121
+ cloned
122
+ end
123
+
124
+ def clone_node_map(node_map)
125
+ @node_map = {}
126
+ node_map.each do |k, v|
127
+ @node_map[k] = v
128
+ end
129
+ end
130
+
131
+ def clone_bond_map(bond_map)
132
+ @bond_map = {}
133
+ bond_map.each do |k, v|
134
+ @bond_map[k] = v
135
+ end
136
+ end
137
+
138
+ def set_new_id
139
+ new_id = @parser.get_tempid
140
+ set_id(new_id)
141
+ new_id
142
+ end
143
+
144
+ def set_id(new_id)
145
+ @id = new_id
146
+ end
147
+ end
148
+ end
149
+ end