hwp_script_to_latex 1.4.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/hwp_script_to_latex/converter.rb +53 -62
- data/lib/hwp_script_to_latex/processor.rb +34 -0
- data/lib/hwp_script_to_latex/syntax.rb +1326 -0
- data/lib/hwp_script_to_latex/validator.rb +84 -0
- data/lib/hwp_script_to_latex/version.rb +1 -1
- metadata +5 -3
- data/rules.json +0 -1277
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e948d02ae83f17c99fa96f38d16c3d0b257e7aa0f7ba4cea964fc300bb24975
|
4
|
+
data.tar.gz: '0825cfac358661df69d6cfa0f487a89428129ffa60c1af6f8e4b182162ea5e8f'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 35732e0afd39f4818ce2aab81458fa7366202631bea5800391df1954ba0f850d7631f57276273126ef6b293269d44239ffde63e155804629f100962b21ab7432
|
7
|
+
data.tar.gz: f7647a86a3de1f04b9e503b0b34300824c29f2ce535c7b109aec3bfe1600ff37fa9edb7a504f96d11cda322697b9a9a0249587d4ff459d0b51c1825e045f3adc
|
@@ -1,33 +1,34 @@
|
|
1
1
|
require 'json'
|
2
|
+
require 'hwp_script_to_latex/syntax'
|
3
|
+
require 'hwp_script_to_latex/processor'
|
2
4
|
|
3
5
|
module HwpScriptToLatex
|
4
6
|
# 한글 수식스크립트를 LaTeX 문법으로 변환
|
5
7
|
class Converter
|
8
|
+
include Syntax
|
9
|
+
|
6
10
|
# 명령어에 사용되는 좌, 우항 정규표현식
|
7
11
|
LEFT_TERM_REGEX = "(?:[^{}\\s`]{9})*?\\K(?<%s>{(?>[^{}]+|(?:\\g<%s>))*}|(?:[^{}\\s`]{0,9}))"
|
8
12
|
RIGHT_TERM_REGEX = "(?<%s>{(?>[^{}]+|(?:\\g<%s>))*}|(?:[^{}\\s`]{0,9}))"
|
9
13
|
|
10
14
|
def initialize
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
@
|
17
|
-
@
|
18
|
-
@
|
19
|
-
@command_keywords = @rules[:keyword][:command].sort_by { |hash| -1 * hash[:regex].inspect.length }
|
20
|
-
@symbol_keywords = @rules[:keyword][:symbol].sort_by { |hash| -1 * hash[:regex].inspect.length }
|
21
|
-
@reserved_keywords = @rules[:keyword][:reserved].sort_by { |hash| -1 * hash[:regex].inspect.length }
|
15
|
+
@processor = Processor.new
|
16
|
+
@keyword_commands = KEYWORD_COMMANDS
|
17
|
+
@default_commands = DEFAULT_COMMANDS
|
18
|
+
@block_commands = BLOCK_COMMANDS
|
19
|
+
|
20
|
+
@meta = META.sort_by { |hash| -1 * hash[:regex].inspect.length }
|
21
|
+
@symbols = SYMBOL.sort_by { |hash| -1 * hash[:regex].inspect.length }
|
22
|
+
@reserved_words = RESERVED_WORD.sort_by { |hash| -1 * hash[:regex].inspect.length }
|
22
23
|
end
|
23
24
|
|
24
25
|
def convert(script, math_mode: false, display_mode: false)
|
25
|
-
#
|
26
|
-
result =
|
27
|
-
#
|
28
|
-
result =
|
29
|
-
# 1개의 우항을 가지는
|
30
|
-
result =
|
26
|
+
# Data pre processing
|
27
|
+
result = @processor.pre_process(script)
|
28
|
+
# 파라미터가 없는 명령어
|
29
|
+
result = replace_keyword_commands(result)
|
30
|
+
# 1개의 우항을 가지는 명령어
|
31
|
+
result = replace_default_commands(result)
|
31
32
|
# 행렬, 케이스등 블록 명령어
|
32
33
|
# cases {...} => \begin{cases}...\end{cases}
|
33
34
|
# dmatrix {...} => \begin{vmatrix}...\end{vmatrix}
|
@@ -45,11 +46,14 @@ module HwpScriptToLatex
|
|
45
46
|
result = replace_sqrt(result) # Case 1
|
46
47
|
result = replace_fractions(result) # Case 2
|
47
48
|
|
49
|
+
# 단순 치환 키워드
|
50
|
+
result = replace_keywords(result)
|
51
|
+
|
48
52
|
# 전체 수식에 디스플레이 스타일 적용
|
49
53
|
result = decorate_displaystyle(result) if display_mode
|
50
54
|
|
51
|
-
#
|
52
|
-
result =
|
55
|
+
# Data post processing
|
56
|
+
result = @processor.post_process(result)
|
53
57
|
|
54
58
|
# Math mode
|
55
59
|
result = "$#{result}$" if math_mode
|
@@ -59,37 +63,40 @@ module HwpScriptToLatex
|
|
59
63
|
|
60
64
|
private
|
61
65
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
#
|
68
|
-
|
69
|
-
#
|
70
|
-
|
71
|
-
#
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
66
|
+
def replace_keyword_commands(script)
|
67
|
+
commands = @keyword_commands
|
68
|
+
matched_count = 0
|
69
|
+
|
70
|
+
# 1star(2star(3starcdotsstar(99star100))cdots)
|
71
|
+
# 위와 같은 경우 이중 루프 cdots 부터 매치되기 시작하면 star 때문에
|
72
|
+
# 제대로 변환되지 않음.
|
73
|
+
# 그러므로 전체 키워드를 검사하는 루프(loop 2)를
|
74
|
+
# 이중 루프로 변환이 완료될 때 까지 반복함(loop 1)
|
75
|
+
# 속도는 느려지겠지만 대안을 찾을 때 까지 이대로 사용하려고 함
|
76
|
+
loop do # loop 1
|
77
|
+
matched_count = 0
|
78
|
+
commands.each do |command|
|
79
|
+
command_regex = rule_regex(command)
|
80
|
+
before_script = script
|
81
|
+
script = script.gsub(command_regex, command[:latex])
|
82
|
+
while before_script != script # loop 2
|
83
|
+
matched_count += 1
|
84
|
+
before_script = script
|
85
|
+
script = script.gsub(command_regex, command[:latex])
|
86
|
+
end
|
87
|
+
end
|
88
|
+
break if matched_count == 0
|
89
|
+
end
|
77
90
|
|
78
91
|
return script
|
79
92
|
end
|
80
93
|
|
81
|
-
|
82
|
-
def post_sanitize(script)
|
83
|
-
# 2개 이상의 공백을 하나의 공백으로 치환
|
84
|
-
script = script.gsub(/\s+/, " ").strip
|
85
|
-
end
|
86
|
-
|
87
|
-
def replace_simple_commands(script)
|
94
|
+
def replace_default_commands(script)
|
88
95
|
right_term_group_name = "rt"
|
89
96
|
|
90
|
-
@
|
97
|
+
@default_commands.each do |command|
|
91
98
|
right_term_regex = RIGHT_TERM_REGEX % [right_term_group_name, right_term_group_name]
|
92
|
-
command_regex = %r((?<![a-zA-Z\\])(?i:#{command[:
|
99
|
+
command_regex = %r((?<![a-zA-Z\\])(?i:#{command[:regex]})\s*#{right_term_regex})
|
93
100
|
|
94
101
|
match_data = script.match(command_regex)
|
95
102
|
while match_data
|
@@ -106,7 +113,7 @@ module HwpScriptToLatex
|
|
106
113
|
|
107
114
|
def replace_block_commands(script)
|
108
115
|
@block_commands.each do |command|
|
109
|
-
command_regex = %r((?<![a-zA-Z])(?i:#{command[:
|
116
|
+
command_regex = %r((?<![a-zA-Z])(?i:#{command[:regex]})\s*(?<block_content>{(?>[^{}]+|(?:\g<block_content>))*}))
|
110
117
|
|
111
118
|
match_data = script.match(command_regex)
|
112
119
|
while match_data
|
@@ -123,7 +130,7 @@ module HwpScriptToLatex
|
|
123
130
|
end
|
124
131
|
|
125
132
|
def replace_keywords(script)
|
126
|
-
keywords = @
|
133
|
+
keywords = @meta + @symbols + @reserved_words
|
127
134
|
matched_count = 0
|
128
135
|
|
129
136
|
# 1star(2star(3starcdotsstar(99star100))cdots)
|
@@ -135,7 +142,7 @@ module HwpScriptToLatex
|
|
135
142
|
loop do # loop 1
|
136
143
|
matched_count = 0
|
137
144
|
keywords.each do |keyword|
|
138
|
-
keyword_regex =
|
145
|
+
keyword_regex = rule_regex(keyword)
|
139
146
|
before_script = script
|
140
147
|
script = script.gsub(keyword_regex, keyword[:latex])
|
141
148
|
while before_script != script # loop 2
|
@@ -201,22 +208,6 @@ module HwpScriptToLatex
|
|
201
208
|
return script
|
202
209
|
end
|
203
210
|
|
204
|
-
def get_keyword_regex(keyword)
|
205
|
-
regexes = []
|
206
|
-
if keyword[:regex].class == Array
|
207
|
-
regexes = keyword[:regex]
|
208
|
-
else
|
209
|
-
regexes << keyword[:regex]
|
210
|
-
end
|
211
|
-
|
212
|
-
is_alphabetic = keyword[:alphabetic].nil? ? true : keyword[:alphabetic]
|
213
|
-
if is_alphabetic
|
214
|
-
return %r((?<![a-zA-Z\\])(#{regexes.join('|')}))
|
215
|
-
else
|
216
|
-
return %r((?<![\\])(#{regexes.join('|')}))
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
211
|
def decorate_displaystyle(script)
|
221
212
|
script = script.gsub(/\\sum/, "\\displaystyle \\sum")
|
222
213
|
script = script.gsub(/\\int/, "\\displaystyle \\int")
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#
|
2
|
+
# Author: osh
|
3
|
+
# Created: 2019-05-27
|
4
|
+
# Last modified: 2019-05-27
|
5
|
+
|
6
|
+
module HwpScriptToLatex
|
7
|
+
class Processor
|
8
|
+
|
9
|
+
# 수식 문자열 전처리
|
10
|
+
def pre_process(script)
|
11
|
+
# 족보닷컴 텍스트 제거
|
12
|
+
jokbo_regex = %r(from\s*=+\s*(?:족보닷컴[\s\S]*?)=+)
|
13
|
+
script = script.gsub(jokbo_regex, "")
|
14
|
+
# 2개 이상의 공백을 하나의 공백으로 치환
|
15
|
+
script = script.gsub(/\s+/, " ").strip
|
16
|
+
# 백슬래시(로만체)를 로만체 명령어로 변환
|
17
|
+
script = script.gsub(/^\\| \\/, " \\rm ")
|
18
|
+
# 꺽쇠 치환
|
19
|
+
script = script.gsub(/</, "<")
|
20
|
+
script = script.gsub(/>/, ">")
|
21
|
+
# 위, 아래 첨자 명령어로 변경
|
22
|
+
script = script.gsub(/_/, " sub ")
|
23
|
+
script = script.gsub(/\^/, " sup ")
|
24
|
+
|
25
|
+
return script
|
26
|
+
end
|
27
|
+
|
28
|
+
# 수식 문자열 후처리
|
29
|
+
def post_process(script)
|
30
|
+
# 2개 이상의 공백을 하나의 공백으로 치환
|
31
|
+
script = script.gsub(/\s+/, " ").strip
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|