seimi 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +111 -0
- data/Rakefile +10 -0
- data/exe/seimi +9 -0
- data/lib/seimi/cli.rb +63 -0
- data/lib/seimi/elements.rb +124 -0
- data/lib/seimi/equation.rb +175 -0
- data/lib/seimi/errors.rb +15 -0
- data/lib/seimi/formula/parser.rb +361 -0
- data/lib/seimi/formula.rb +65 -0
- data/lib/seimi/isotopes.rb +1115 -0
- data/lib/seimi/kanji.rb +118 -0
- data/lib/seimi/sangi.rb +66 -0
- data/lib/seimi/version.rb +5 -0
- data/lib/seimi.rb +15 -0
- data/test/test_buntai.rb +26 -0
- data/test/test_cli.rb +25 -0
- data/test/test_equation.rb +73 -0
- data/test/test_formula.rb +78 -0
- data/test/test_helper.rb +6 -0
- data/test/test_kanji.rb +20 -0
- metadata +94 -0
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Seimi
|
|
4
|
+
class Formula
|
|
5
|
+
class Parser
|
|
6
|
+
UPPER = /[A-Z]/.freeze
|
|
7
|
+
LOWER = /[a-z]/.freeze
|
|
8
|
+
DIGIT = /[0-9]/.freeze
|
|
9
|
+
SUPERSCRIPT_DIGITS = {
|
|
10
|
+
"⁰" => "0", "¹" => "1", "²" => "2", "³" => "3", "⁴" => "4",
|
|
11
|
+
"⁵" => "5", "⁶" => "6", "⁷" => "7", "⁸" => "8", "⁹" => "9"
|
|
12
|
+
}.freeze
|
|
13
|
+
SUPERSCRIPT_SIGNS = { "⁺" => "+", "⁻" => "-" }.freeze
|
|
14
|
+
|
|
15
|
+
def initialize(source)
|
|
16
|
+
@source = String(source).strip
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def parse
|
|
20
|
+
body, charge = split_charge(@source)
|
|
21
|
+
parsed = split_hydrate_segments(body).each_with_index.map do |segment, index|
|
|
22
|
+
parse_segment(segment, hydrate: index.positive?)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
Formula.new(
|
|
26
|
+
@source,
|
|
27
|
+
compact_composition(parsed, :composition),
|
|
28
|
+
compact_composition(parsed, :balance_composition),
|
|
29
|
+
compact_components(parsed.flat_map { |item| item[:components] }),
|
|
30
|
+
charge
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def parse_segment(segment, hydrate:)
|
|
37
|
+
factor, body = extract_hydrate_factor(segment, hydrate)
|
|
38
|
+
@body = body
|
|
39
|
+
@index = 0
|
|
40
|
+
|
|
41
|
+
composition, balance_composition, components = parse_group
|
|
42
|
+
raise_invalid(current_char) unless at_end?
|
|
43
|
+
|
|
44
|
+
{
|
|
45
|
+
composition: scale_hash(composition, factor),
|
|
46
|
+
balance_composition: scale_hash(balance_composition, factor),
|
|
47
|
+
components: scale_components(components, factor)
|
|
48
|
+
}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def parse_group(terminator = nil)
|
|
52
|
+
composition = {}
|
|
53
|
+
balance_composition = {}
|
|
54
|
+
components = []
|
|
55
|
+
|
|
56
|
+
until at_end?
|
|
57
|
+
char = current_char
|
|
58
|
+
return [composition, balance_composition, compact_components(components)] if terminator && char == terminator
|
|
59
|
+
|
|
60
|
+
case char
|
|
61
|
+
when "("
|
|
62
|
+
inner = parse_bracketed_group(")")
|
|
63
|
+
merge_parsed_group(composition, balance_composition, components, inner, parse_count)
|
|
64
|
+
when "["
|
|
65
|
+
parse_square_open(composition, balance_composition, components)
|
|
66
|
+
when ")", "]"
|
|
67
|
+
raise_invalid(char)
|
|
68
|
+
when "^"
|
|
69
|
+
component = parse_isotope_prefix
|
|
70
|
+
add_component(composition, balance_composition, components, component, parse_count)
|
|
71
|
+
when UPPER
|
|
72
|
+
component = parse_element_component
|
|
73
|
+
add_component(composition, balance_composition, components, component, parse_count)
|
|
74
|
+
else
|
|
75
|
+
raise_invalid(char)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
raise ParseError, MSG_UNCLOSED_PAREN if terminator
|
|
80
|
+
raise_invalid("") if composition.empty?
|
|
81
|
+
|
|
82
|
+
[composition, balance_composition, compact_components(components)]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def parse_square_open(composition, balance_composition, components)
|
|
86
|
+
if next_char&.match?(DIGIT)
|
|
87
|
+
component = parse_isotope_bracket
|
|
88
|
+
add_component(composition, balance_composition, components, component, parse_count)
|
|
89
|
+
return
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
inner = parse_bracketed_group("]")
|
|
93
|
+
merge_parsed_group(composition, balance_composition, components, inner, parse_count)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def parse_bracketed_group(terminator)
|
|
97
|
+
advance
|
|
98
|
+
inner = parse_group(terminator)
|
|
99
|
+
raise ParseError, MSG_UNCLOSED_PAREN if at_end?
|
|
100
|
+
|
|
101
|
+
advance
|
|
102
|
+
inner
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def parse_element_component
|
|
106
|
+
symbol = parse_element_symbol
|
|
107
|
+
return parse_hydrogen_isotope_alias(symbol) if hydrogen_isotope_alias?(symbol)
|
|
108
|
+
|
|
109
|
+
validate_element!(symbol)
|
|
110
|
+
|
|
111
|
+
Component.new(
|
|
112
|
+
symbol: symbol,
|
|
113
|
+
element: symbol,
|
|
114
|
+
balance_symbol: symbol,
|
|
115
|
+
count: 1,
|
|
116
|
+
atomic_mass: ELEMENTS.fetch(symbol).first
|
|
117
|
+
)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def parse_isotope_prefix
|
|
121
|
+
advance
|
|
122
|
+
mass_number = parse_required_digits
|
|
123
|
+
parse_isotope_element(mass_number)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def parse_isotope_bracket
|
|
127
|
+
advance
|
|
128
|
+
mass_number = parse_required_digits
|
|
129
|
+
component = parse_isotope_element(mass_number)
|
|
130
|
+
raise ParseError, MSG_UNCLOSED_PAREN if at_end?
|
|
131
|
+
raise_invalid(current_char) unless current_char == "]"
|
|
132
|
+
|
|
133
|
+
advance
|
|
134
|
+
component
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def parse_isotope_element(mass_number)
|
|
138
|
+
symbol = parse_element_symbol
|
|
139
|
+
validate_element!(symbol)
|
|
140
|
+
mass = isotope_mass(symbol, mass_number)
|
|
141
|
+
|
|
142
|
+
Component.new(
|
|
143
|
+
symbol: "#{mass_number}#{symbol}",
|
|
144
|
+
element: symbol,
|
|
145
|
+
balance_symbol: "#{mass_number}#{symbol}",
|
|
146
|
+
count: 1,
|
|
147
|
+
atomic_mass: mass
|
|
148
|
+
)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def parse_hydrogen_isotope_alias(symbol)
|
|
152
|
+
mass_number = symbol == "D" ? 2 : 3
|
|
153
|
+
|
|
154
|
+
Component.new(
|
|
155
|
+
symbol: symbol,
|
|
156
|
+
element: "H",
|
|
157
|
+
balance_symbol: "#{mass_number}H",
|
|
158
|
+
count: 1,
|
|
159
|
+
atomic_mass: isotope_mass("H", mass_number.to_s)
|
|
160
|
+
)
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def parse_element_symbol
|
|
164
|
+
symbol = current_char
|
|
165
|
+
advance
|
|
166
|
+
if !at_end? && current_char.match?(LOWER)
|
|
167
|
+
symbol += current_char
|
|
168
|
+
advance
|
|
169
|
+
end
|
|
170
|
+
symbol
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def parse_required_digits
|
|
174
|
+
digits = +""
|
|
175
|
+
while !at_end? && current_char.match?(DIGIT)
|
|
176
|
+
digits << current_char
|
|
177
|
+
advance
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
raise_invalid(current_char || "") if digits.empty? || digits.to_i.zero?
|
|
181
|
+
|
|
182
|
+
digits
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def parse_count
|
|
186
|
+
digits = +""
|
|
187
|
+
while !at_end? && current_char.match?(DIGIT)
|
|
188
|
+
digits << current_char
|
|
189
|
+
advance
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
return 1 if digits.empty?
|
|
193
|
+
|
|
194
|
+
count = digits.to_i
|
|
195
|
+
raise_invalid(digits) if count.zero?
|
|
196
|
+
|
|
197
|
+
count
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def add_component(composition, balance_composition, components, component, factor)
|
|
201
|
+
merge_count(composition, component.element, factor)
|
|
202
|
+
merge_count(balance_composition, component.balance_symbol, factor)
|
|
203
|
+
components << component_with_count(component, factor)
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def merge_parsed_group(composition, balance_composition, components, parsed, factor)
|
|
207
|
+
inner_composition, inner_balance_composition, inner_components = parsed
|
|
208
|
+
inner_composition.each { |symbol, count| merge_count(composition, symbol, count * factor) }
|
|
209
|
+
inner_balance_composition.each { |symbol, count| merge_count(balance_composition, symbol, count * factor) }
|
|
210
|
+
components.concat(scale_components(inner_components, factor))
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def merge_count(hash, key, count)
|
|
214
|
+
hash[key] = hash.fetch(key, 0) + count
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def validate_element!(symbol)
|
|
218
|
+
return if ELEMENTS.key?(symbol)
|
|
219
|
+
|
|
220
|
+
raise UnknownElementError, format(MSG_UNKNOWN_ELEMENT, symbol)
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def isotope_mass(symbol, mass_number)
|
|
224
|
+
mass = ISOTOPE_MASSES.dig(symbol, mass_number.to_i)
|
|
225
|
+
return mass if mass
|
|
226
|
+
|
|
227
|
+
raise ParseError, format(MSG_PARSE_INVALID_CHAR, "#{mass_number}#{symbol}")
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def hydrogen_isotope_alias?(symbol)
|
|
231
|
+
symbol == "D" || symbol == "T"
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def split_charge(source)
|
|
235
|
+
return [source, 0] if source.empty?
|
|
236
|
+
|
|
237
|
+
caret = source.match(/\A(.+)\^(\d*)([+-])\z/)
|
|
238
|
+
return [caret[1], signed_charge(caret[2], caret[3])] if caret
|
|
239
|
+
|
|
240
|
+
superscript = source.match(/\A(.+?)([#{SUPERSCRIPT_DIGITS.keys.join}]*)([#{SUPERSCRIPT_SIGNS.keys.join}])\z/)
|
|
241
|
+
if superscript
|
|
242
|
+
digits = superscript[2].chars.map { |char| SUPERSCRIPT_DIGITS.fetch(char) }.join
|
|
243
|
+
return [superscript[1], signed_charge(digits, SUPERSCRIPT_SIGNS.fetch(superscript[3]))]
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
return [source, 0] unless source.end_with?("+", "-")
|
|
247
|
+
|
|
248
|
+
body = source[0...-1]
|
|
249
|
+
sign = source[-1]
|
|
250
|
+
bare_charge = body.match(/\A(.+?)(\d+)\z/)
|
|
251
|
+
if bare_charge && single_element_body?(bare_charge[1])
|
|
252
|
+
return [bare_charge[1], signed_charge(bare_charge[2], sign)]
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
[body, signed_charge("", sign)]
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def signed_charge(digits, sign)
|
|
259
|
+
magnitude = digits.empty? ? 1 : digits.to_i
|
|
260
|
+
sign == "-" ? -magnitude : magnitude
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def single_element_body?(body)
|
|
264
|
+
body.match?(/\A[A-Z][a-z]?\z/) && ELEMENTS.key?(body)
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def split_hydrate_segments(source)
|
|
268
|
+
segments = []
|
|
269
|
+
buffer = +""
|
|
270
|
+
depth = 0
|
|
271
|
+
|
|
272
|
+
source.each_char do |char|
|
|
273
|
+
if (char == "." || char == "·") && depth.zero?
|
|
274
|
+
segments << buffer
|
|
275
|
+
buffer = +""
|
|
276
|
+
next
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
depth += 1 if char == "(" || char == "["
|
|
280
|
+
depth -= 1 if char == ")" || char == "]"
|
|
281
|
+
buffer << char
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
segments << buffer
|
|
285
|
+
raise_invalid("") if segments.any?(&:empty?)
|
|
286
|
+
|
|
287
|
+
segments
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def extract_hydrate_factor(segment, hydrate)
|
|
291
|
+
return [1, segment] unless hydrate
|
|
292
|
+
|
|
293
|
+
match = segment.match(/\A(\d+)(.+)\z/)
|
|
294
|
+
return [1, segment] unless match
|
|
295
|
+
|
|
296
|
+
[match[1].to_i, match[2]]
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
def compact_composition(parsed, key)
|
|
300
|
+
parsed.each_with_object({}) do |item, result|
|
|
301
|
+
item.fetch(key).each { |symbol, count| merge_count(result, symbol, count) }
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def compact_components(components)
|
|
306
|
+
components.each_with_object([]) do |component, result|
|
|
307
|
+
existing = result.find do |item|
|
|
308
|
+
item.symbol == component.symbol &&
|
|
309
|
+
item.element == component.element &&
|
|
310
|
+
item.balance_symbol == component.balance_symbol &&
|
|
311
|
+
item.atomic_mass == component.atomic_mass
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
if existing
|
|
315
|
+
existing.count += component.count
|
|
316
|
+
else
|
|
317
|
+
result << component.dup
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def scale_hash(hash, factor)
|
|
323
|
+
hash.transform_values { |count| count * factor }
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def scale_components(components, factor)
|
|
327
|
+
components.map { |component| component_with_count(component, component.count * factor) }
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
def component_with_count(component, count)
|
|
331
|
+
Component.new(
|
|
332
|
+
symbol: component.symbol,
|
|
333
|
+
element: component.element,
|
|
334
|
+
balance_symbol: component.balance_symbol,
|
|
335
|
+
count: count,
|
|
336
|
+
atomic_mass: component.atomic_mass
|
|
337
|
+
)
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
def raise_invalid(char)
|
|
341
|
+
raise ParseError, format(MSG_PARSE_INVALID_CHAR, char)
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def current_char
|
|
345
|
+
@body[@index]
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
def next_char
|
|
349
|
+
@body[@index + 1]
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
def advance
|
|
353
|
+
@index += 1
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
def at_end?
|
|
357
|
+
@index >= @body.length
|
|
358
|
+
end
|
|
359
|
+
end
|
|
360
|
+
end
|
|
361
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "elements"
|
|
4
|
+
require_relative "errors"
|
|
5
|
+
require_relative "isotopes"
|
|
6
|
+
|
|
7
|
+
module Seimi
|
|
8
|
+
class Formula
|
|
9
|
+
Component = Struct.new(:symbol, :element, :balance_symbol, :count, :atomic_mass, keyword_init: true) do
|
|
10
|
+
def total_mass
|
|
11
|
+
atomic_mass * count
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
attr_reader :composition, :balance_composition, :charge
|
|
16
|
+
|
|
17
|
+
def self.parse(source)
|
|
18
|
+
Parser.new(source).parse
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def initialize(source, composition, balance_composition, components, charge)
|
|
22
|
+
@source = source
|
|
23
|
+
@composition = composition
|
|
24
|
+
@balance_composition = balance_composition
|
|
25
|
+
@components = components
|
|
26
|
+
@charge = charge
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def molar_mass
|
|
30
|
+
@components.sum(0.0) { |component| component.total_mass }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def breakdown
|
|
34
|
+
@components.map do |component|
|
|
35
|
+
[component.symbol, component.count, component.total_mass]
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def charged?
|
|
40
|
+
!charge.zero?
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def to_s
|
|
44
|
+
@source
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def to_kobun
|
|
48
|
+
require_relative "kanji"
|
|
49
|
+
require_relative "sangi"
|
|
50
|
+
|
|
51
|
+
lines = ["〔#{self} 解剖の覚〕"]
|
|
52
|
+
@components.each do |component|
|
|
53
|
+
name = ELEMENTS.fetch(component.element).last
|
|
54
|
+
lines << "一、#{name}(#{component.symbol}) #{Kanji.decimal(component.total_mass, 3)} 匁掛ける#{Kanji.from_i(component.count)}つ"
|
|
55
|
+
end
|
|
56
|
+
lines << "電荷 #{Kanji.rational(charge)} に候" if charged?
|
|
57
|
+
lines << "〆て 分子の量 凡そ#{Kanji.decimal(molar_mass, 3)} に候"
|
|
58
|
+
lines << Sangi.render(molar_mass.round)
|
|
59
|
+
lines << "右の如く相違なく候 也"
|
|
60
|
+
lines.join("\n")
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
require_relative "formula/parser"
|