hwp_script_to_latex 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/hwp_script_to_latex/converter.rb +53 -62
- data/lib/hwp_script_to_latex/processor.rb +34 -0
- data/lib/hwp_script_to_latex/syntax.rb +1326 -0
- data/lib/hwp_script_to_latex/validator.rb +84 -0
- data/lib/hwp_script_to_latex/version.rb +1 -1
- metadata +5 -3
- data/rules.json +0 -1277
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e948d02ae83f17c99fa96f38d16c3d0b257e7aa0f7ba4cea964fc300bb24975
|
4
|
+
data.tar.gz: '0825cfac358661df69d6cfa0f487a89428129ffa60c1af6f8e4b182162ea5e8f'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 35732e0afd39f4818ce2aab81458fa7366202631bea5800391df1954ba0f850d7631f57276273126ef6b293269d44239ffde63e155804629f100962b21ab7432
|
7
|
+
data.tar.gz: f7647a86a3de1f04b9e503b0b34300824c29f2ce535c7b109aec3bfe1600ff37fa9edb7a504f96d11cda322697b9a9a0249587d4ff459d0b51c1825e045f3adc
|
@@ -1,33 +1,34 @@
|
|
1
1
|
require 'json'
|
2
|
+
require 'hwp_script_to_latex/syntax'
|
3
|
+
require 'hwp_script_to_latex/processor'
|
2
4
|
|
3
5
|
module HwpScriptToLatex
|
4
6
|
# 한글 수식스크립트를 LaTeX 문법으로 변환
|
5
7
|
class Converter
|
8
|
+
include Syntax
|
9
|
+
|
6
10
|
# 명령어에 사용되는 좌, 우항 정규표현식
|
7
11
|
LEFT_TERM_REGEX = "(?:[^{}\\s`]{9})*?\\K(?<%s>{(?>[^{}]+|(?:\\g<%s>))*}|(?:[^{}\\s`]{0,9}))"
|
8
12
|
RIGHT_TERM_REGEX = "(?<%s>{(?>[^{}]+|(?:\\g<%s>))*}|(?:[^{}\\s`]{0,9}))"
|
9
13
|
|
10
14
|
def initialize
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
@
|
17
|
-
@
|
18
|
-
@
|
19
|
-
@command_keywords = @rules[:keyword][:command].sort_by { |hash| -1 * hash[:regex].inspect.length }
|
20
|
-
@symbol_keywords = @rules[:keyword][:symbol].sort_by { |hash| -1 * hash[:regex].inspect.length }
|
21
|
-
@reserved_keywords = @rules[:keyword][:reserved].sort_by { |hash| -1 * hash[:regex].inspect.length }
|
15
|
+
@processor = Processor.new
|
16
|
+
@keyword_commands = KEYWORD_COMMANDS
|
17
|
+
@default_commands = DEFAULT_COMMANDS
|
18
|
+
@block_commands = BLOCK_COMMANDS
|
19
|
+
|
20
|
+
@meta = META.sort_by { |hash| -1 * hash[:regex].inspect.length }
|
21
|
+
@symbols = SYMBOL.sort_by { |hash| -1 * hash[:regex].inspect.length }
|
22
|
+
@reserved_words = RESERVED_WORD.sort_by { |hash| -1 * hash[:regex].inspect.length }
|
22
23
|
end
|
23
24
|
|
24
25
|
def convert(script, math_mode: false, display_mode: false)
|
25
|
-
#
|
26
|
-
result =
|
27
|
-
#
|
28
|
-
result =
|
29
|
-
# 1개의 우항을 가지는
|
30
|
-
result =
|
26
|
+
# Data pre processing
|
27
|
+
result = @processor.pre_process(script)
|
28
|
+
# 파라미터가 없는 명령어
|
29
|
+
result = replace_keyword_commands(result)
|
30
|
+
# 1개의 우항을 가지는 명령어
|
31
|
+
result = replace_default_commands(result)
|
31
32
|
# 행렬, 케이스등 블록 명령어
|
32
33
|
# cases {...} => \begin{cases}...\end{cases}
|
33
34
|
# dmatrix {...} => \begin{vmatrix}...\end{vmatrix}
|
@@ -45,11 +46,14 @@ module HwpScriptToLatex
|
|
45
46
|
result = replace_sqrt(result) # Case 1
|
46
47
|
result = replace_fractions(result) # Case 2
|
47
48
|
|
49
|
+
# 단순 치환 키워드
|
50
|
+
result = replace_keywords(result)
|
51
|
+
|
48
52
|
# 전체 수식에 디스플레이 스타일 적용
|
49
53
|
result = decorate_displaystyle(result) if display_mode
|
50
54
|
|
51
|
-
#
|
52
|
-
result =
|
55
|
+
# Data post processing
|
56
|
+
result = @processor.post_process(result)
|
53
57
|
|
54
58
|
# Math mode
|
55
59
|
result = "$#{result}$" if math_mode
|
@@ -59,37 +63,40 @@ module HwpScriptToLatex
|
|
59
63
|
|
60
64
|
private
|
61
65
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
#
|
68
|
-
|
69
|
-
#
|
70
|
-
|
71
|
-
#
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
66
|
+
def replace_keyword_commands(script)
|
67
|
+
commands = @keyword_commands
|
68
|
+
matched_count = 0
|
69
|
+
|
70
|
+
# 1star(2star(3starcdotsstar(99star100))cdots)
|
71
|
+
# 위와 같은 경우 이중 루프 cdots 부터 매치되기 시작하면 star 때문에
|
72
|
+
# 제대로 변환되지 않음.
|
73
|
+
# 그러므로 전체 키워드를 검사하는 루프(loop 2)를
|
74
|
+
# 이중 루프로 변환이 완료될 때 까지 반복함(loop 1)
|
75
|
+
# 속도는 느려지겠지만 대안을 찾을 때 까지 이대로 사용하려고 함
|
76
|
+
loop do # loop 1
|
77
|
+
matched_count = 0
|
78
|
+
commands.each do |command|
|
79
|
+
command_regex = rule_regex(command)
|
80
|
+
before_script = script
|
81
|
+
script = script.gsub(command_regex, command[:latex])
|
82
|
+
while before_script != script # loop 2
|
83
|
+
matched_count += 1
|
84
|
+
before_script = script
|
85
|
+
script = script.gsub(command_regex, command[:latex])
|
86
|
+
end
|
87
|
+
end
|
88
|
+
break if matched_count == 0
|
89
|
+
end
|
77
90
|
|
78
91
|
return script
|
79
92
|
end
|
80
93
|
|
81
|
-
|
82
|
-
def post_sanitize(script)
|
83
|
-
# 2개 이상의 공백을 하나의 공백으로 치환
|
84
|
-
script = script.gsub(/\s+/, " ").strip
|
85
|
-
end
|
86
|
-
|
87
|
-
def replace_simple_commands(script)
|
94
|
+
def replace_default_commands(script)
|
88
95
|
right_term_group_name = "rt"
|
89
96
|
|
90
|
-
@
|
97
|
+
@default_commands.each do |command|
|
91
98
|
right_term_regex = RIGHT_TERM_REGEX % [right_term_group_name, right_term_group_name]
|
92
|
-
command_regex = %r((?<![a-zA-Z\\])(?i:#{command[:
|
99
|
+
command_regex = %r((?<![a-zA-Z\\])(?i:#{command[:regex]})\s*#{right_term_regex})
|
93
100
|
|
94
101
|
match_data = script.match(command_regex)
|
95
102
|
while match_data
|
@@ -106,7 +113,7 @@ module HwpScriptToLatex
|
|
106
113
|
|
107
114
|
def replace_block_commands(script)
|
108
115
|
@block_commands.each do |command|
|
109
|
-
command_regex = %r((?<![a-zA-Z])(?i:#{command[:
|
116
|
+
command_regex = %r((?<![a-zA-Z])(?i:#{command[:regex]})\s*(?<block_content>{(?>[^{}]+|(?:\g<block_content>))*}))
|
110
117
|
|
111
118
|
match_data = script.match(command_regex)
|
112
119
|
while match_data
|
@@ -123,7 +130,7 @@ module HwpScriptToLatex
|
|
123
130
|
end
|
124
131
|
|
125
132
|
def replace_keywords(script)
|
126
|
-
keywords = @
|
133
|
+
keywords = @meta + @symbols + @reserved_words
|
127
134
|
matched_count = 0
|
128
135
|
|
129
136
|
# 1star(2star(3starcdotsstar(99star100))cdots)
|
@@ -135,7 +142,7 @@ module HwpScriptToLatex
|
|
135
142
|
loop do # loop 1
|
136
143
|
matched_count = 0
|
137
144
|
keywords.each do |keyword|
|
138
|
-
keyword_regex =
|
145
|
+
keyword_regex = rule_regex(keyword)
|
139
146
|
before_script = script
|
140
147
|
script = script.gsub(keyword_regex, keyword[:latex])
|
141
148
|
while before_script != script # loop 2
|
@@ -201,22 +208,6 @@ module HwpScriptToLatex
|
|
201
208
|
return script
|
202
209
|
end
|
203
210
|
|
204
|
-
def get_keyword_regex(keyword)
|
205
|
-
regexes = []
|
206
|
-
if keyword[:regex].class == Array
|
207
|
-
regexes = keyword[:regex]
|
208
|
-
else
|
209
|
-
regexes << keyword[:regex]
|
210
|
-
end
|
211
|
-
|
212
|
-
is_alphabetic = keyword[:alphabetic].nil? ? true : keyword[:alphabetic]
|
213
|
-
if is_alphabetic
|
214
|
-
return %r((?<![a-zA-Z\\])(#{regexes.join('|')}))
|
215
|
-
else
|
216
|
-
return %r((?<![\\])(#{regexes.join('|')}))
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
211
|
def decorate_displaystyle(script)
|
221
212
|
script = script.gsub(/\\sum/, "\\displaystyle \\sum")
|
222
213
|
script = script.gsub(/\\int/, "\\displaystyle \\int")
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#
|
2
|
+
# Author: osh
|
3
|
+
# Created: 2019-05-27
|
4
|
+
# Last modified: 2019-05-27
|
5
|
+
|
6
|
+
module HwpScriptToLatex
|
7
|
+
class Processor
|
8
|
+
|
9
|
+
# 수식 문자열 전처리
|
10
|
+
def pre_process(script)
|
11
|
+
# 족보닷컴 텍스트 제거
|
12
|
+
jokbo_regex = %r(from\s*=+\s*(?:족보닷컴[\s\S]*?)=+)
|
13
|
+
script = script.gsub(jokbo_regex, "")
|
14
|
+
# 2개 이상의 공백을 하나의 공백으로 치환
|
15
|
+
script = script.gsub(/\s+/, " ").strip
|
16
|
+
# 백슬래시(로만체)를 로만체 명령어로 변환
|
17
|
+
script = script.gsub(/^\\| \\/, " \\rm ")
|
18
|
+
# 꺽쇠 치환
|
19
|
+
script = script.gsub(/</, "<")
|
20
|
+
script = script.gsub(/>/, ">")
|
21
|
+
# 위, 아래 첨자 명령어로 변경
|
22
|
+
script = script.gsub(/_/, " sub ")
|
23
|
+
script = script.gsub(/\^/, " sup ")
|
24
|
+
|
25
|
+
return script
|
26
|
+
end
|
27
|
+
|
28
|
+
# 수식 문자열 후처리
|
29
|
+
def post_process(script)
|
30
|
+
# 2개 이상의 공백을 하나의 공백으로 치환
|
31
|
+
script = script.gsub(/\s+/, " ").strip
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|