seimi 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,361 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Seimi
4
+ class Formula
5
+ class Parser
6
+ UPPER = /[A-Z]/.freeze
7
+ LOWER = /[a-z]/.freeze
8
+ DIGIT = /[0-9]/.freeze
9
+ SUPERSCRIPT_DIGITS = {
10
+ "⁰" => "0", "¹" => "1", "²" => "2", "³" => "3", "⁴" => "4",
11
+ "⁵" => "5", "⁶" => "6", "⁷" => "7", "⁸" => "8", "⁹" => "9"
12
+ }.freeze
13
+ SUPERSCRIPT_SIGNS = { "⁺" => "+", "⁻" => "-" }.freeze
14
+
15
+ def initialize(source)
16
+ @source = String(source).strip
17
+ end
18
+
19
+ def parse
20
+ body, charge = split_charge(@source)
21
+ parsed = split_hydrate_segments(body).each_with_index.map do |segment, index|
22
+ parse_segment(segment, hydrate: index.positive?)
23
+ end
24
+
25
+ Formula.new(
26
+ @source,
27
+ compact_composition(parsed, :composition),
28
+ compact_composition(parsed, :balance_composition),
29
+ compact_components(parsed.flat_map { |item| item[:components] }),
30
+ charge
31
+ )
32
+ end
33
+
34
+ private
35
+
36
+ def parse_segment(segment, hydrate:)
37
+ factor, body = extract_hydrate_factor(segment, hydrate)
38
+ @body = body
39
+ @index = 0
40
+
41
+ composition, balance_composition, components = parse_group
42
+ raise_invalid(current_char) unless at_end?
43
+
44
+ {
45
+ composition: scale_hash(composition, factor),
46
+ balance_composition: scale_hash(balance_composition, factor),
47
+ components: scale_components(components, factor)
48
+ }
49
+ end
50
+
51
+ def parse_group(terminator = nil)
52
+ composition = {}
53
+ balance_composition = {}
54
+ components = []
55
+
56
+ until at_end?
57
+ char = current_char
58
+ return [composition, balance_composition, compact_components(components)] if terminator && char == terminator
59
+
60
+ case char
61
+ when "("
62
+ inner = parse_bracketed_group(")")
63
+ merge_parsed_group(composition, balance_composition, components, inner, parse_count)
64
+ when "["
65
+ parse_square_open(composition, balance_composition, components)
66
+ when ")", "]"
67
+ raise_invalid(char)
68
+ when "^"
69
+ component = parse_isotope_prefix
70
+ add_component(composition, balance_composition, components, component, parse_count)
71
+ when UPPER
72
+ component = parse_element_component
73
+ add_component(composition, balance_composition, components, component, parse_count)
74
+ else
75
+ raise_invalid(char)
76
+ end
77
+ end
78
+
79
+ raise ParseError, MSG_UNCLOSED_PAREN if terminator
80
+ raise_invalid("") if composition.empty?
81
+
82
+ [composition, balance_composition, compact_components(components)]
83
+ end
84
+
85
+ def parse_square_open(composition, balance_composition, components)
86
+ if next_char&.match?(DIGIT)
87
+ component = parse_isotope_bracket
88
+ add_component(composition, balance_composition, components, component, parse_count)
89
+ return
90
+ end
91
+
92
+ inner = parse_bracketed_group("]")
93
+ merge_parsed_group(composition, balance_composition, components, inner, parse_count)
94
+ end
95
+
96
+ def parse_bracketed_group(terminator)
97
+ advance
98
+ inner = parse_group(terminator)
99
+ raise ParseError, MSG_UNCLOSED_PAREN if at_end?
100
+
101
+ advance
102
+ inner
103
+ end
104
+
105
+ def parse_element_component
106
+ symbol = parse_element_symbol
107
+ return parse_hydrogen_isotope_alias(symbol) if hydrogen_isotope_alias?(symbol)
108
+
109
+ validate_element!(symbol)
110
+
111
+ Component.new(
112
+ symbol: symbol,
113
+ element: symbol,
114
+ balance_symbol: symbol,
115
+ count: 1,
116
+ atomic_mass: ELEMENTS.fetch(symbol).first
117
+ )
118
+ end
119
+
120
+ def parse_isotope_prefix
121
+ advance
122
+ mass_number = parse_required_digits
123
+ parse_isotope_element(mass_number)
124
+ end
125
+
126
+ def parse_isotope_bracket
127
+ advance
128
+ mass_number = parse_required_digits
129
+ component = parse_isotope_element(mass_number)
130
+ raise ParseError, MSG_UNCLOSED_PAREN if at_end?
131
+ raise_invalid(current_char) unless current_char == "]"
132
+
133
+ advance
134
+ component
135
+ end
136
+
137
+ def parse_isotope_element(mass_number)
138
+ symbol = parse_element_symbol
139
+ validate_element!(symbol)
140
+ mass = isotope_mass(symbol, mass_number)
141
+
142
+ Component.new(
143
+ symbol: "#{mass_number}#{symbol}",
144
+ element: symbol,
145
+ balance_symbol: "#{mass_number}#{symbol}",
146
+ count: 1,
147
+ atomic_mass: mass
148
+ )
149
+ end
150
+
151
+ def parse_hydrogen_isotope_alias(symbol)
152
+ mass_number = symbol == "D" ? 2 : 3
153
+
154
+ Component.new(
155
+ symbol: symbol,
156
+ element: "H",
157
+ balance_symbol: "#{mass_number}H",
158
+ count: 1,
159
+ atomic_mass: isotope_mass("H", mass_number.to_s)
160
+ )
161
+ end
162
+
163
+ def parse_element_symbol
164
+ symbol = current_char
165
+ advance
166
+ if !at_end? && current_char.match?(LOWER)
167
+ symbol += current_char
168
+ advance
169
+ end
170
+ symbol
171
+ end
172
+
173
+ def parse_required_digits
174
+ digits = +""
175
+ while !at_end? && current_char.match?(DIGIT)
176
+ digits << current_char
177
+ advance
178
+ end
179
+
180
+ raise_invalid(current_char || "") if digits.empty? || digits.to_i.zero?
181
+
182
+ digits
183
+ end
184
+
185
+ def parse_count
186
+ digits = +""
187
+ while !at_end? && current_char.match?(DIGIT)
188
+ digits << current_char
189
+ advance
190
+ end
191
+
192
+ return 1 if digits.empty?
193
+
194
+ count = digits.to_i
195
+ raise_invalid(digits) if count.zero?
196
+
197
+ count
198
+ end
199
+
200
+ def add_component(composition, balance_composition, components, component, factor)
201
+ merge_count(composition, component.element, factor)
202
+ merge_count(balance_composition, component.balance_symbol, factor)
203
+ components << component_with_count(component, factor)
204
+ end
205
+
206
+ def merge_parsed_group(composition, balance_composition, components, parsed, factor)
207
+ inner_composition, inner_balance_composition, inner_components = parsed
208
+ inner_composition.each { |symbol, count| merge_count(composition, symbol, count * factor) }
209
+ inner_balance_composition.each { |symbol, count| merge_count(balance_composition, symbol, count * factor) }
210
+ components.concat(scale_components(inner_components, factor))
211
+ end
212
+
213
+ def merge_count(hash, key, count)
214
+ hash[key] = hash.fetch(key, 0) + count
215
+ end
216
+
217
+ def validate_element!(symbol)
218
+ return if ELEMENTS.key?(symbol)
219
+
220
+ raise UnknownElementError, format(MSG_UNKNOWN_ELEMENT, symbol)
221
+ end
222
+
223
+ def isotope_mass(symbol, mass_number)
224
+ mass = ISOTOPE_MASSES.dig(symbol, mass_number.to_i)
225
+ return mass if mass
226
+
227
+ raise ParseError, format(MSG_PARSE_INVALID_CHAR, "#{mass_number}#{symbol}")
228
+ end
229
+
230
+ def hydrogen_isotope_alias?(symbol)
231
+ symbol == "D" || symbol == "T"
232
+ end
233
+
234
+ def split_charge(source)
235
+ return [source, 0] if source.empty?
236
+
237
+ caret = source.match(/\A(.+)\^(\d*)([+-])\z/)
238
+ return [caret[1], signed_charge(caret[2], caret[3])] if caret
239
+
240
+ superscript = source.match(/\A(.+?)([#{SUPERSCRIPT_DIGITS.keys.join}]*)([#{SUPERSCRIPT_SIGNS.keys.join}])\z/)
241
+ if superscript
242
+ digits = superscript[2].chars.map { |char| SUPERSCRIPT_DIGITS.fetch(char) }.join
243
+ return [superscript[1], signed_charge(digits, SUPERSCRIPT_SIGNS.fetch(superscript[3]))]
244
+ end
245
+
246
+ return [source, 0] unless source.end_with?("+", "-")
247
+
248
+ body = source[0...-1]
249
+ sign = source[-1]
250
+ bare_charge = body.match(/\A(.+?)(\d+)\z/)
251
+ if bare_charge && single_element_body?(bare_charge[1])
252
+ return [bare_charge[1], signed_charge(bare_charge[2], sign)]
253
+ end
254
+
255
+ [body, signed_charge("", sign)]
256
+ end
257
+
258
+ def signed_charge(digits, sign)
259
+ magnitude = digits.empty? ? 1 : digits.to_i
260
+ sign == "-" ? -magnitude : magnitude
261
+ end
262
+
263
+ def single_element_body?(body)
264
+ body.match?(/\A[A-Z][a-z]?\z/) && ELEMENTS.key?(body)
265
+ end
266
+
267
+ def split_hydrate_segments(source)
268
+ segments = []
269
+ buffer = +""
270
+ depth = 0
271
+
272
+ source.each_char do |char|
273
+ if (char == "." || char == "·") && depth.zero?
274
+ segments << buffer
275
+ buffer = +""
276
+ next
277
+ end
278
+
279
+ depth += 1 if char == "(" || char == "["
280
+ depth -= 1 if char == ")" || char == "]"
281
+ buffer << char
282
+ end
283
+
284
+ segments << buffer
285
+ raise_invalid("") if segments.any?(&:empty?)
286
+
287
+ segments
288
+ end
289
+
290
+ def extract_hydrate_factor(segment, hydrate)
291
+ return [1, segment] unless hydrate
292
+
293
+ match = segment.match(/\A(\d+)(.+)\z/)
294
+ return [1, segment] unless match
295
+
296
+ [match[1].to_i, match[2]]
297
+ end
298
+
299
+ def compact_composition(parsed, key)
300
+ parsed.each_with_object({}) do |item, result|
301
+ item.fetch(key).each { |symbol, count| merge_count(result, symbol, count) }
302
+ end
303
+ end
304
+
305
+ def compact_components(components)
306
+ components.each_with_object([]) do |component, result|
307
+ existing = result.find do |item|
308
+ item.symbol == component.symbol &&
309
+ item.element == component.element &&
310
+ item.balance_symbol == component.balance_symbol &&
311
+ item.atomic_mass == component.atomic_mass
312
+ end
313
+
314
+ if existing
315
+ existing.count += component.count
316
+ else
317
+ result << component.dup
318
+ end
319
+ end
320
+ end
321
+
322
+ def scale_hash(hash, factor)
323
+ hash.transform_values { |count| count * factor }
324
+ end
325
+
326
+ def scale_components(components, factor)
327
+ components.map { |component| component_with_count(component, component.count * factor) }
328
+ end
329
+
330
+ def component_with_count(component, count)
331
+ Component.new(
332
+ symbol: component.symbol,
333
+ element: component.element,
334
+ balance_symbol: component.balance_symbol,
335
+ count: count,
336
+ atomic_mass: component.atomic_mass
337
+ )
338
+ end
339
+
340
+ def raise_invalid(char)
341
+ raise ParseError, format(MSG_PARSE_INVALID_CHAR, char)
342
+ end
343
+
344
+ def current_char
345
+ @body[@index]
346
+ end
347
+
348
+ def next_char
349
+ @body[@index + 1]
350
+ end
351
+
352
+ def advance
353
+ @index += 1
354
+ end
355
+
356
+ def at_end?
357
+ @index >= @body.length
358
+ end
359
+ end
360
+ end
361
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "elements"
4
+ require_relative "errors"
5
+ require_relative "isotopes"
6
+
7
+ module Seimi
8
+ class Formula
9
+ Component = Struct.new(:symbol, :element, :balance_symbol, :count, :atomic_mass, keyword_init: true) do
10
+ def total_mass
11
+ atomic_mass * count
12
+ end
13
+ end
14
+
15
+ attr_reader :composition, :balance_composition, :charge
16
+
17
+ def self.parse(source)
18
+ Parser.new(source).parse
19
+ end
20
+
21
+ def initialize(source, composition, balance_composition, components, charge)
22
+ @source = source
23
+ @composition = composition
24
+ @balance_composition = balance_composition
25
+ @components = components
26
+ @charge = charge
27
+ end
28
+
29
+ def molar_mass
30
+ @components.sum(0.0) { |component| component.total_mass }
31
+ end
32
+
33
+ def breakdown
34
+ @components.map do |component|
35
+ [component.symbol, component.count, component.total_mass]
36
+ end
37
+ end
38
+
39
+ def charged?
40
+ !charge.zero?
41
+ end
42
+
43
+ def to_s
44
+ @source
45
+ end
46
+
47
+ def to_kobun
48
+ require_relative "kanji"
49
+ require_relative "sangi"
50
+
51
+ lines = ["〔#{self} 解剖の覚〕"]
52
+ @components.each do |component|
53
+ name = ELEMENTS.fetch(component.element).last
54
+ lines << "一、#{name}(#{component.symbol}) #{Kanji.decimal(component.total_mass, 3)} 匁掛ける#{Kanji.from_i(component.count)}つ"
55
+ end
56
+ lines << "電荷 #{Kanji.rational(charge)} に候" if charged?
57
+ lines << "〆て 分子の量 凡そ#{Kanji.decimal(molar_mass, 3)} に候"
58
+ lines << Sangi.render(molar_mass.round)
59
+ lines << "右の如く相違なく候 也"
60
+ lines.join("\n")
61
+ end
62
+ end
63
+ end
64
+
65
+ require_relative "formula/parser"