chem_scanner 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +13 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +604 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/CODE_OF_CONDUCT.md +74 -0
  9. data/Gemfile +20 -0
  10. data/LICENSE.txt +661 -0
  11. data/README.md +177 -0
  12. data/Rakefile +8 -0
  13. data/bin/console +14 -0
  14. data/bin/setup +8 -0
  15. data/chem_scanner.gemspec +43 -0
  16. data/lib/chem_scanner.rb +79 -0
  17. data/lib/chem_scanner/cdx.rb +67 -0
  18. data/lib/chem_scanner/cdxml.rb +72 -0
  19. data/lib/chem_scanner/chem_draw/cdx_reader.rb +101 -0
  20. data/lib/chem_scanner/chem_draw/node/base_node.rb +123 -0
  21. data/lib/chem_scanner/chem_draw/node/base_value.rb +257 -0
  22. data/lib/chem_scanner/chem_draw/node/bond.rb +100 -0
  23. data/lib/chem_scanner/chem_draw/node/bracket_attachment.rb +17 -0
  24. data/lib/chem_scanner/chem_draw/node/bracket_group.rb +32 -0
  25. data/lib/chem_scanner/chem_draw/node/chem_geometry.rb +58 -0
  26. data/lib/chem_scanner/chem_draw/node/color_table.rb +46 -0
  27. data/lib/chem_scanner/chem_draw/node/font_table.rb +54 -0
  28. data/lib/chem_scanner/chem_draw/node/fragment.rb +149 -0
  29. data/lib/chem_scanner/chem_draw/node/fragment_node.rb +145 -0
  30. data/lib/chem_scanner/chem_draw/node/graphic.rb +94 -0
  31. data/lib/chem_scanner/chem_draw/node/text.rb +242 -0
  32. data/lib/chem_scanner/chem_draw/parser.rb +214 -0
  33. data/lib/chem_scanner/chem_draw/yaml/cdx_objects.yaml +32 -0
  34. data/lib/chem_scanner/chem_draw/yaml/cdx_props.yaml +263 -0
  35. data/lib/chem_scanner/chem_draw/yaml/cdxml_objects.yaml +36 -0
  36. data/lib/chem_scanner/chem_draw/yaml/cdxml_props.yaml +263 -0
  37. data/lib/chem_scanner/chem_draw/yaml/props_data_type.yaml +263 -0
  38. data/lib/chem_scanner/configuration/abbreviation.rb +76 -0
  39. data/lib/chem_scanner/configuration/superatom.rb +76 -0
  40. data/lib/chem_scanner/configuration/superatom.txt +2874 -0
  41. data/lib/chem_scanner/configuration/util.rb +40 -0
  42. data/lib/chem_scanner/configuration/yaml/abbreviations.yaml +6399 -0
  43. data/lib/chem_scanner/configuration/yaml/elements.yaml +115 -0
  44. data/lib/chem_scanner/configuration/yaml/solvents.yaml +16 -0
  45. data/lib/chem_scanner/doc.rb +56 -0
  46. data/lib/chem_scanner/docx.rb +86 -0
  47. data/lib/chem_scanner/export/cml.rb +176 -0
  48. data/lib/chem_scanner/extension/element_map.rb +9 -0
  49. data/lib/chem_scanner/extension/geometry/bounding_box.rb +84 -0
  50. data/lib/chem_scanner/extension/geometry/line.rb +123 -0
  51. data/lib/chem_scanner/extension/geometry/point.rb +18 -0
  52. data/lib/chem_scanner/extension/geometry/polygon.rb +115 -0
  53. data/lib/chem_scanner/extension/geometry/segment.rb +196 -0
  54. data/lib/chem_scanner/extension/passthrough.rb +7 -0
  55. data/lib/chem_scanner/interpreter/element/arrow.rb +298 -0
  56. data/lib/chem_scanner/interpreter/element/atom.rb +134 -0
  57. data/lib/chem_scanner/interpreter/element/fragment.rb +59 -0
  58. data/lib/chem_scanner/interpreter/element/molecule.rb +473 -0
  59. data/lib/chem_scanner/interpreter/element/molecule_group.rb +34 -0
  60. data/lib/chem_scanner/interpreter/element/reaction.rb +186 -0
  61. data/lib/chem_scanner/interpreter/element/reaction_step.rb +39 -0
  62. data/lib/chem_scanner/interpreter/formula_to_mol.rb +75 -0
  63. data/lib/chem_scanner/interpreter/post_process/assemble.rb +38 -0
  64. data/lib/chem_scanner/interpreter/post_process/label_by_molecule.rb +37 -0
  65. data/lib/chem_scanner/interpreter/post_process/reaction_info.rb +225 -0
  66. data/lib/chem_scanner/interpreter/post_process/reaction_step.rb +95 -0
  67. data/lib/chem_scanner/interpreter/post_process/reagent_label.rb +46 -0
  68. data/lib/chem_scanner/interpreter/post_process/text_as_molecule.rb +52 -0
  69. data/lib/chem_scanner/interpreter/post_process/text_label.rb +40 -0
  70. data/lib/chem_scanner/interpreter/pre_process/arrow.rb +197 -0
  71. data/lib/chem_scanner/interpreter/pre_process/graphic.rb +41 -0
  72. data/lib/chem_scanner/interpreter/pre_process/molecule.rb +150 -0
  73. data/lib/chem_scanner/interpreter/reaction_detection/assign_to_reaction.rb +129 -0
  74. data/lib/chem_scanner/interpreter/reaction_detection/duplicate_reagents.rb +50 -0
  75. data/lib/chem_scanner/interpreter/reaction_detection/molecule_group.rb +55 -0
  76. data/lib/chem_scanner/interpreter/reaction_detection/multi_line_chain_reaction.rb +85 -0
  77. data/lib/chem_scanner/interpreter/reaction_detection/remove_separated_mol.rb +115 -0
  78. data/lib/chem_scanner/interpreter/reaction_detection/text_assignment.rb +166 -0
  79. data/lib/chem_scanner/interpreter/scheme.rb +173 -0
  80. data/lib/chem_scanner/interpreter/scheme_base.rb +64 -0
  81. data/lib/chem_scanner/interpreter/text_group/bold_groups.rb +183 -0
  82. data/lib/chem_scanner/interpreter/text_group/molecule_text_group.rb +138 -0
  83. data/lib/chem_scanner/interpreter/text_group/reaction_text_groups.rb +221 -0
  84. data/lib/chem_scanner/interpreter/text_group/retrieve_alias_info.rb +41 -0
  85. data/lib/chem_scanner/interpreter/text_group/retrieve_n_atoms.rb +106 -0
  86. data/lib/chem_scanner/interpreter/text_group/text_group_interpreter.rb +92 -0
  87. data/lib/chem_scanner/perkin_eln.rb +287 -0
  88. data/lib/chem_scanner/version.rb +5 -0
  89. data/lib/rubygems_plugin.rb +5 -0
  90. metadata +244 -0
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module Interpreter
5
+ using Extension
6
+
7
+ module PreProcess
8
+ def refine_molecules
9
+ process_orbital_as_polymer
10
+ fragment_to_molecules
11
+ populate_molecule_info
12
+
13
+ assemble_ionic_molecule
14
+ end
15
+
16
+ def process_orbital_as_polymer
17
+ @graphic_map.each_value do |graphic|
18
+ next unless graphic.orbital_type == 256 && graphic.oval_type == 3
19
+
20
+ gpoly = graphic.polygon
21
+ next if gpoly.nil?
22
+
23
+ @fragment_map.each_value do |fragment|
24
+ fragment.node_map.each_value do |node|
25
+ next unless gpoly.contains?(node.point)
26
+
27
+ node.set_is_polymer
28
+ fragment.polygon = fragment.polygon.merge_polygon(gpoly)
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ def fragment_to_molecules
35
+ @fragment_map.each do |k, fragment|
36
+ next if fragment.node_map.count.zero?
37
+
38
+ mol = Molecule.new(fragment)
39
+ mol.process
40
+ @mol_map[k] = mol
41
+ end
42
+
43
+ @fragment_group_map.each do |k, fgroup|
44
+ mgroup = MoleculeGroup.new
45
+ mgroup.title = fgroup[:title]
46
+
47
+ fgroup[:fragment_map].each do |_, fragment|
48
+ # NOTE: nested fragment should not contain any special type.
49
+ # For instance, there are some cases that
50
+ # DMF is implicitly converted to C-C-C with nickname D-M-F
51
+ node_type = fragment.node_map.detect { |_, n| n.type.positive? }
52
+ mgroup.add_fragment(fragment) if node_type.nil?
53
+ end
54
+
55
+ @mol_group_map[k] = mgroup
56
+ end
57
+ end
58
+
59
+ def populate_molecule_info
60
+ list_mol = @mol_map.values
61
+ list_mol_group = @mol_group_map.values.reduce([]) do |acc, mgroup|
62
+ acc.concat(mgroup.molecules)
63
+ end
64
+
65
+ (list_mol + list_mol_group).each(&:update_output_formats)
66
+ end
67
+
68
+ def assemble_ionic_molecule
69
+ charged_mol = @mol_map.each_with_object([]) do |(k, mol), arr|
70
+ charged_ids = mol.charged_atom_ids
71
+ next arr unless charged_ids.size == 1
72
+
73
+ aid = charged_ids.first
74
+ charge = mol.atom_map[aid].charge
75
+ arr.push(mol: mol, aid: aid, charge: charge, mid: k)
76
+ end
77
+
78
+ charged_group = @mol_group_map.each_with_object([]) do |(k, group), arr|
79
+ next arr unless group.molecules.count == 1
80
+
81
+ mol = group.molecules.first
82
+ charged_ids = mol.charged_atom_ids
83
+ next arr unless charged_ids.count == 1
84
+
85
+ aid = charged_ids.first
86
+ charge = mol.atom_map[aid].charge
87
+ arr.push(mol: mol, aid: aid, charge: charge, mid: k)
88
+ end
89
+
90
+ list_mol = charged_mol.concat(charged_group)
91
+ grouped = {}
92
+ list_mol.each do |charged_info|
93
+ mol = charged_info[:mol]
94
+ charge = charged_info[:charge]
95
+ center = mol.polygon.bounding_box.center
96
+
97
+ others = list_mol.select { |ocharged| ocharged[:charge] == -charge }
98
+ opposite_mol = others.each_with_object(dist: 99999) do |minfo, obj|
99
+ ocenter = minfo[:mol].polygon.bounding_box.center
100
+ dist = Geometry.distance(center, ocenter)
101
+
102
+ if dist < obj[:dist]
103
+ obj[:dist] = dist
104
+ obj.merge!(mol: minfo[:mol], mid: minfo[:mid])
105
+ end
106
+ end
107
+ # Estimated value, could change later
108
+ next if opposite_mol[:mol].nil? || opposite_mol[:dist] > 4
109
+
110
+ mid = charged_info[:mid]
111
+ next if grouped.key?(mid) || grouped.value?(mid)
112
+
113
+ grouped[mid] = opposite_mol[:mid]
114
+ end
115
+
116
+ # { a1 => b, a2 => b, a3 => c } then remove both a1 and a2
117
+ values = []
118
+ dup_hash = {}
119
+ grouped.each do |key, okey|
120
+ values.push(okey) unless values.include?(okey)
121
+ dup_hash[okey] = (dup_hash[okey] || []).push(key)
122
+ end
123
+ dup_keys = dup_hash.values.select { |x| x.size > 1 }.flatten
124
+ grouped.delete_if { |k, _| dup_keys.include?(k) }
125
+
126
+ grouped.each do |key, okey|
127
+ get_mol = lambda do |id|
128
+ if @mol_map.key?(id)
129
+ @mol_map[id]
130
+ else
131
+ @mol_group_map[id].molecules.first
132
+ end
133
+ end
134
+
135
+ mol = get_mol.call(key)
136
+ omol = get_mol.call(okey)
137
+
138
+ mol.add(omol)
139
+ mol.update_output_formats
140
+ @mol_map.delete(okey)
141
+ mgid = @mol_group_map.delete(okey)
142
+ next if mgid.nil?
143
+
144
+ tid = mgid.title.id
145
+ @text_map.delete(tid)
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module Interpreter
5
+ using Extension
6
+
7
+ require "chem_scanner/interpreter/scheme_base"
8
+
9
+ module ReactionDetection
10
+ include ChemScanner::Interpreter::SchemeBase
11
+
12
+ def assign_to_reaction
13
+ undetected_molecules = {}
14
+
15
+ @arrow_map.each do |key, arrow|
16
+ reaction = Reaction.new
17
+ reaction.arrow_id = key
18
+ undetected = []
19
+
20
+ @mol_map.reject { |_, mol| mol.boxed }.each do |kmol, mol|
21
+ mpoly = mol.polygon
22
+
23
+ @arrow_map.each_value do |a|
24
+ dist = a.min_distance_to_polygon(mpoly)
25
+ a.build_polygons(mpoly.height + dist)
26
+ end
27
+
28
+ group = detect_position(arrow, mpoly)
29
+
30
+ case group
31
+ when "reagents" then reaction.reagent_ids.push(kmol)
32
+ when "reactants" then reaction.reactant_ids.push(kmol)
33
+ when "products" then reaction.product_ids.push(kmol)
34
+ else undetected.push(kmol)
35
+ end
36
+ end
37
+
38
+ @reactions.push(reaction)
39
+ undetected_molecules[key] = undetected unless undetected.empty?
40
+ end
41
+
42
+ # Molecules which are both reagents and reactants/products
43
+ # If reagent -> arrow distance in range, then consider as reagent
44
+ # Otherwise, consider as reactant/product
45
+ @reactions.each do |r|
46
+ reagent_ids = r.reagent_ids
47
+ arrow = @arrow_map[r.arrow_id]
48
+
49
+ others = @reactions.reject { |oreact| oreact.arrow_id == r.arrow_id }
50
+ others.each do |o|
51
+ common = reagent_ids & o.reactant_ids
52
+ common += reagent_ids & o.product_ids
53
+ common.each do |cid|
54
+ mol = @mol_map[cid]
55
+ dist = arrow.min_distance_to_polygon(mol.polygon)
56
+ target = dist > 2 ? r : o
57
+ target.delete_id(cid)
58
+ end
59
+ end
60
+ end
61
+
62
+ auto_fit_arrow_polygons
63
+
64
+ undetected_molecules.each do |rkey, ids|
65
+ reaction = @reactions.detect { |r| r.arrow_id == rkey }
66
+ arrow = @arrow_map[rkey]
67
+
68
+ ids.each do |id|
69
+ mol = @mol_map[id]
70
+ mpoly = mol.polygon
71
+ group = detect_position(arrow, mpoly)
72
+
73
+ case group
74
+ when "reagents" then reaction.reagent_ids.push(id)
75
+ when "reactants" then reaction.reactant_ids.push(id)
76
+ when "products" then reaction.product_ids.push(id)
77
+ end
78
+ end
79
+ end
80
+ end
81
+
82
+ def detect_position(arrow, mol_poly)
83
+ mcenter = mol_poly.center
84
+
85
+ check_pos = check_position(mol_poly, arrow)
86
+ return "products" if check_pos && arrow.product_side?(mcenter)
87
+
88
+ check_pos = check_position(mol_poly, arrow, false)
89
+ return "reactants" if check_pos && arrow.reactant_side?(mcenter)
90
+
91
+ return "reagents" if arrow.polygon_around?(mol_poly)
92
+
93
+ nil
94
+ end
95
+
96
+ # Check if molecule belong to reaction
97
+ def check_position(mol_poly, arrow, prod_side = true)
98
+ arrow_segment = ->(larrow) do
99
+ prod_side ? larrow.head_segment : larrow.tail_segment
100
+ end
101
+
102
+ segment = arrow_segment.call(arrow)
103
+ sline = segment.to_line
104
+ inter = sline.intersects_with_polygon?(mol_poly)
105
+ return false unless inter
106
+
107
+ inter_point = sline.intersection_points_with_polygon(mol_poly).first
108
+ inter_seg = Geometry::Segment.new(segment.point2, inter_point)
109
+
110
+ @arrow_map.except(arrow.id).each_value do |oarrow|
111
+ other_hseg = oarrow.head_segment
112
+ check_contains = (
113
+ other_hseg.contains_segment?(segment) ||
114
+ segment.contains_segment?(other_hseg)
115
+ )
116
+ next if check_contains
117
+
118
+ osegment = arrow_segment.call(oarrow)
119
+ check = osegment.to_line.intersects_with_polygon?(mol_poly) && \
120
+ oarrow.all_intersects_with_segment?(inter_seg)
121
+
122
+ return false if check
123
+ end
124
+
125
+ true
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ # Interpret the parsed/extracted geometry block
5
+ module Interpreter
6
+ using Extension
7
+
8
+ module ReactionDetection
9
+ def refine_duplicate_reagents
10
+ delete_info = []
11
+
12
+ @reactions.each do |r|
13
+ arrow = @arrow_map[r.arrow_id]
14
+ rremain = @reactions.reject { |other| other.arrow_id == r.arrow_id }
15
+
16
+ rremain.each do |other|
17
+ other_rps = other.reactant_ids + other.product_ids
18
+ r.reagent_ids -= other_rps
19
+
20
+ dup_ids = r.reagent_ids & other.reagent_ids
21
+ next if dup_ids.empty?
22
+
23
+ dup_ids.each do |id|
24
+ obj = @mol_map.key?(id) ? @mol_map[id] : @text_map[id]
25
+
26
+ polygon = obj.polygon
27
+ pcenter = polygon.center
28
+ apoint = arrow.contains_point?(pcenter)
29
+ opoint = @arrow_map[other.arrow_id].contains_point?(pcenter)
30
+ next if apoint.nil? || opoint.nil?
31
+
32
+ rdist = pcenter.distance_to(apoint)
33
+ odist = pcenter.distance_to(opoint)
34
+
35
+ if rdist > odist
36
+ info = OpenStruct.new(rid: r.arrow_id, id: id)
37
+ delete_info.push(info)
38
+ end
39
+ end
40
+ end
41
+ end
42
+
43
+ delete_info.each do |info|
44
+ reaction = @reactions.detect { |r| r.arrow_id == info.rid }
45
+ reaction.delete_id(info.id)
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module Interpreter
5
+ using Extension
6
+
7
+ module ReactionDetection
8
+ require "chem_scanner/interpreter/reaction_detection/text_assignment"
9
+
10
+ def assign_molecule_group
11
+ all_reagent_ids = @reactions.reduce([]) do |acc, r|
12
+ acc.concat(@arrow_map[r.arrow_id].text_arr)
13
+ end
14
+
15
+ auto_fit_arrow_polygons
16
+
17
+ @mol_group_map.select do |tid, mgroup|
18
+ (
19
+ !all_reagent_ids.include?(tid) &&
20
+ mgroup.molecules.count == 1 &&
21
+ !mgroup.molecules.first.boxed
22
+ )
23
+ end.each do |mkey, mgroup|
24
+ mol = mgroup.molecules.first
25
+ mmid = mol.fragment.id
26
+
27
+ mgroup_pos = {}
28
+ @reactions.each do |reaction|
29
+ rid = reaction.arrow_id
30
+ arrow = @arrow_map[rid]
31
+ group = detect_position(arrow, mgroup.title.polygon)
32
+ next if group.nil?
33
+
34
+ mgroup_pos[rid] = group
35
+ end
36
+
37
+ pos = mgroup_pos.detect { |_, p| p == "reagents" }
38
+ next unless pos.nil?
39
+
40
+ pos = mgroup_pos.detect { |_, p| %w[reactants products].include?(p) }
41
+ next if pos.nil?
42
+
43
+ # Don't need to keep it text_map anymore
44
+ mol.text = @text_map.delete(mkey).value unless mgroup_pos.empty?
45
+ mol.text_ids.delete(mkey)
46
+ @mol_map.each_value { |m| m.text_ids.delete(mkey) }
47
+
48
+ reaction = @reactions.detect { |r| r.arrow_id == pos[0] }
49
+ group_ids = reaction.send("#{pos[1][0...-1]}_ids")
50
+ group_ids.push(mmid)
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChemScanner
4
+ module Interpreter
5
+ using Extension
6
+
7
+ module ReactionDetection
8
+ def multi_line_chain_reaction
9
+ return if check_reaction_orderring
10
+
11
+ rarray = @reactions.select do |r|
12
+ r.reactant_ids.count.zero? || r.product_ids.count.zero?
13
+ end
14
+
15
+ rcount = rarray.count
16
+ return if rcount.zero?
17
+
18
+ auto_fit_arrow_polygons
19
+
20
+ sorted_akey = sort_arrow_map
21
+
22
+ get_reaction = ->(id) { @reactions.detect { |r| r.arrow_id == id } }
23
+
24
+ rarray.each do |reaction|
25
+ rkey = sorted_akey.find_index do |key_arr|
26
+ key_arr.include?(reaction.arrow_id)
27
+ end
28
+ next if rkey.nil?
29
+
30
+ if reaction.reactant_ids.count.zero?
31
+ other_ids = sorted_akey[rkey - 1]
32
+ next if other_ids.nil?
33
+
34
+ other_id = other_ids.last
35
+ other = get_reaction.call(other_id)
36
+ reaction.reactant_ids.concat(other.product_ids)
37
+ else
38
+ other_ids = sorted_akey[rkey + 1]
39
+ next if other_ids.nil?
40
+
41
+ other_id = other_ids.first
42
+ other = get_reaction.call(other_id)
43
+ reaction.product_ids.concat(other.reactant_ids)
44
+ end
45
+ end
46
+ end
47
+
48
+ def check_reaction_orderring
49
+ return true if @arrow_map.count < 2
50
+
51
+ @arrow_map.each_value do |arrow|
52
+ return true if arrow.middle_points.count > 0
53
+ return true unless arrow.head_segment.to_line.horizontal?
54
+ end
55
+
56
+ false
57
+ end
58
+
59
+ def sort_arrow_map
60
+ sorted_arr = []
61
+ arrow_keys = @arrow_map.keys
62
+
63
+ while !arrow_keys.empty?
64
+ arrow = @arrow_map[arrow_keys.first]
65
+ aheight = arrow.height
66
+ min_height = arrow.head.y - aheight
67
+ max_height = arrow.head.y + aheight
68
+
69
+ akeys = arrow_keys.select do |ak|
70
+ y_head = @arrow_map[ak].head.y
71
+ y_head >= min_height && y_head <= max_height
72
+ end
73
+
74
+ sorted_arr.push(akeys)
75
+ arrow_keys = arrow_keys - akeys
76
+ end
77
+
78
+ sorted_arr.map! { |arr| arr.sort_by! { |id| @arrow_map[id].head.x } }
79
+ sorted_arr.sort_by! { |arr| - @arrow_map[arr.first].head.y }
80
+
81
+ sorted_arr
82
+ end
83
+ end
84
+ end
85
+ end