hwp_script_to_latex 1.4.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8a1f7260a320ad6b3be51ded4f0a61b2da7d0143ed340715e37a0ebdcf89f2e1
4
- data.tar.gz: 045d8409baaeff28e2803f13f7c5fe152e8f7100749d40a253fef6f2f21f6467
3
+ metadata.gz: 5e948d02ae83f17c99fa96f38d16c3d0b257e7aa0f7ba4cea964fc300bb24975
4
+ data.tar.gz: '0825cfac358661df69d6cfa0f487a89428129ffa60c1af6f8e4b182162ea5e8f'
5
5
  SHA512:
6
- metadata.gz: 020d0e2857c38dbf1aa4ae5051150631bcb4033f075fef06c2229923fce601ca74c9d3058ccaa5e106b3eb2c923528af7b3993802681997a4869d29087333de5
7
- data.tar.gz: 5addae0db39cea048c66807b8f1141a31c8206f63dd3788a995bedb3bfb59331096463924eca23f3e2c1c8ba934fab1df9cb007fa5457c185c9e7f0369627b07
6
+ metadata.gz: 35732e0afd39f4818ce2aab81458fa7366202631bea5800391df1954ba0f850d7631f57276273126ef6b293269d44239ffde63e155804629f100962b21ab7432
7
+ data.tar.gz: f7647a86a3de1f04b9e503b0b34300824c29f2ce535c7b109aec3bfe1600ff37fa9edb7a504f96d11cda322697b9a9a0249587d4ff459d0b51c1825e045f3adc
@@ -1,33 +1,34 @@
1
1
  require 'json'
2
+ require 'hwp_script_to_latex/syntax'
3
+ require 'hwp_script_to_latex/processor'
2
4
 
3
5
  module HwpScriptToLatex
4
6
  # 한글 수식스크립트를 LaTeX 문법으로 변환
5
7
  class Converter
8
+ include Syntax
9
+
6
10
  # 명령어에 사용되는 좌, 우항 정규표현식
7
11
  LEFT_TERM_REGEX = "(?:[^{}\\s`]{9})*?\\K(?<%s>{(?>[^{}]+|(?:\\g<%s>))*}|(?:[^{}\\s`]{0,9}))"
8
12
  RIGHT_TERM_REGEX = "(?<%s>{(?>[^{}]+|(?:\\g<%s>))*}|(?:[^{}\\s`]{0,9}))"
9
13
 
10
14
  def initialize
11
- f = File.open(File.join(File.dirname(__FILE__), '..', '..', 'rules.json'), 'r')
12
- file_content = f.read
13
- f.close
14
-
15
- @rules= JSON.parse(file_content, symbolize_names: true)
16
- @simple_commands = @rules[:command][:simple].sort_by { |hash| -1 * hash[:script].length }
17
- @block_commands = @rules[:command][:block]
18
- @meta_keywords = @rules[:keyword][:meta].sort_by { |hash| -1 * hash[:regex].inspect.length }
19
- @command_keywords = @rules[:keyword][:command].sort_by { |hash| -1 * hash[:regex].inspect.length }
20
- @symbol_keywords = @rules[:keyword][:symbol].sort_by { |hash| -1 * hash[:regex].inspect.length }
21
- @reserved_keywords = @rules[:keyword][:reserved].sort_by { |hash| -1 * hash[:regex].inspect.length }
15
+ @processor = Processor.new
16
+ @keyword_commands = KEYWORD_COMMANDS
17
+ @default_commands = DEFAULT_COMMANDS
18
+ @block_commands = BLOCK_COMMANDS
19
+
20
+ @meta = META.sort_by { |hash| -1 * hash[:regex].inspect.length }
21
+ @symbols = SYMBOL.sort_by { |hash| -1 * hash[:regex].inspect.length }
22
+ @reserved_words = RESERVED_WORD.sort_by { |hash| -1 * hash[:regex].inspect.length }
22
23
  end
23
24
 
24
25
  def convert(script, math_mode: false, display_mode: false)
25
- # Sanitize for starting convert
26
- result = pre_sanitize(script)
27
- # 단순 치환 키워드
28
- result = replace_keywords(result)
29
- # 1개의 우항을 가지는 간단한 명령어
30
- result = replace_simple_commands(result)
26
+ # Data pre processing
27
+ result = @processor.pre_process(script)
28
+ # 파라미터가 없는 명령어
29
+ result = replace_keyword_commands(result)
30
+ # 1개의 우항을 가지는 명령어
31
+ result = replace_default_commands(result)
31
32
  # 행렬, 케이스등 블록 명령어
32
33
  # cases {...} => \begin{cases}...\end{cases}
33
34
  # dmatrix {...} => \begin{vmatrix}...\end{vmatrix}
@@ -45,11 +46,14 @@ module HwpScriptToLatex
45
46
  result = replace_sqrt(result) # Case 1
46
47
  result = replace_fractions(result) # Case 2
47
48
 
49
+ # 단순 치환 키워드
50
+ result = replace_keywords(result)
51
+
48
52
  # 전체 수식에 디스플레이 스타일 적용
49
53
  result = decorate_displaystyle(result) if display_mode
50
54
 
51
- # Sanitize result for removing dirty spaces
52
- result = post_sanitize(result)
55
+ # Data post processing
56
+ result = @processor.post_process(result)
53
57
 
54
58
  # Math mode
55
59
  result = "$#{result}$" if math_mode
@@ -59,37 +63,40 @@ module HwpScriptToLatex
59
63
 
60
64
  private
61
65
 
62
- # 변환 전 sanitize
63
- def pre_sanitize(script)
64
- # 족보닷컴 텍스트 제거
65
- jokbo_regex = %r(from\s*=+\s*(?:족보닷컴[\s\S]*?)=+)
66
- script = script.gsub(jokbo_regex, "")
67
- # 2개 이상의 공백을 하나의 공백으로 치환
68
- script = script.gsub(/\s+/, " ").strip
69
- # 백슬래시(로만체)를 삭제
70
- script = script.gsub(/\\/, "")
71
- # 꺽쇠 치환
72
- script = script.gsub(/&lt;/, "<")
73
- script = script.gsub(/&gt;/, ">")
74
- # 위, 아래 첨자 명령어로 변경
75
- script = script.gsub(/_/, " sub ")
76
- script = script.gsub(/\^/, " sup ")
66
+ def replace_keyword_commands(script)
67
+ commands = @keyword_commands
68
+ matched_count = 0
69
+
70
+ # 1star(2star(3starcdotsstar(99star100))cdots)
71
+ # 위와 같은 경우 이중 루프 cdots 부터 매치되기 시작하면 star 때문에
72
+ # 제대로 변환되지 않음.
73
+ # 그러므로 전체 키워드를 검사하는 루프(loop 2)를
74
+ # 이중 루프로 변환이 완료될 때 까지 반복함(loop 1)
75
+ # 속도는 느려지겠지만 대안을 찾을 때 까지 이대로 사용하려고 함
76
+ loop do # loop 1
77
+ matched_count = 0
78
+ commands.each do |command|
79
+ command_regex = rule_regex(command)
80
+ before_script = script
81
+ script = script.gsub(command_regex, command[:latex])
82
+ while before_script != script # loop 2
83
+ matched_count += 1
84
+ before_script = script
85
+ script = script.gsub(command_regex, command[:latex])
86
+ end
87
+ end
88
+ break if matched_count == 0
89
+ end
77
90
 
78
91
  return script
79
92
  end
80
93
 
81
- # 변환 후 sanitize
82
- def post_sanitize(script)
83
- # 2개 이상의 공백을 하나의 공백으로 치환
84
- script = script.gsub(/\s+/, " ").strip
85
- end
86
-
87
- def replace_simple_commands(script)
94
+ def replace_default_commands(script)
88
95
  right_term_group_name = "rt"
89
96
 
90
- @simple_commands.each do |command|
97
+ @default_commands.each do |command|
91
98
  right_term_regex = RIGHT_TERM_REGEX % [right_term_group_name, right_term_group_name]
92
- command_regex = %r((?<![a-zA-Z\\])(?i:#{command[:script]})\s*#{right_term_regex})
99
+ command_regex = %r((?<![a-zA-Z\\])(?i:#{command[:regex]})\s*#{right_term_regex})
93
100
 
94
101
  match_data = script.match(command_regex)
95
102
  while match_data
@@ -106,7 +113,7 @@ module HwpScriptToLatex
106
113
 
107
114
  def replace_block_commands(script)
108
115
  @block_commands.each do |command|
109
- command_regex = %r((?<![a-zA-Z])(?i:#{command[:script]})\s*(?<block_content>{(?>[^{}]+|(?:\g<block_content>))*}))
116
+ command_regex = %r((?<![a-zA-Z])(?i:#{command[:regex]})\s*(?<block_content>{(?>[^{}]+|(?:\g<block_content>))*}))
110
117
 
111
118
  match_data = script.match(command_regex)
112
119
  while match_data
@@ -123,7 +130,7 @@ module HwpScriptToLatex
123
130
  end
124
131
 
125
132
  def replace_keywords(script)
126
- keywords = @meta_keywords + @command_keywords + @symbol_keywords + @reserved_keywords
133
+ keywords = @meta + @symbols + @reserved_words
127
134
  matched_count = 0
128
135
 
129
136
  # 1star(2star(3starcdotsstar(99star100))cdots)
@@ -135,7 +142,7 @@ module HwpScriptToLatex
135
142
  loop do # loop 1
136
143
  matched_count = 0
137
144
  keywords.each do |keyword|
138
- keyword_regex = get_keyword_regex(keyword)
145
+ keyword_regex = rule_regex(keyword)
139
146
  before_script = script
140
147
  script = script.gsub(keyword_regex, keyword[:latex])
141
148
  while before_script != script # loop 2
@@ -201,22 +208,6 @@ module HwpScriptToLatex
201
208
  return script
202
209
  end
203
210
 
204
- def get_keyword_regex(keyword)
205
- regexes = []
206
- if keyword[:regex].class == Array
207
- regexes = keyword[:regex]
208
- else
209
- regexes << keyword[:regex]
210
- end
211
-
212
- is_alphabetic = keyword[:alphabetic].nil? ? true : keyword[:alphabetic]
213
- if is_alphabetic
214
- return %r((?<![a-zA-Z\\])(#{regexes.join('|')}))
215
- else
216
- return %r((?<![\\])(#{regexes.join('|')}))
217
- end
218
- end
219
-
220
211
  def decorate_displaystyle(script)
221
212
  script = script.gsub(/\\sum/, "\\displaystyle \\sum")
222
213
  script = script.gsub(/\\int/, "\\displaystyle \\int")
@@ -0,0 +1,34 @@
1
+ #
2
+ # Author: osh
3
+ # Created: 2019-05-27
4
+ # Last modified: 2019-05-27
5
+
6
+ module HwpScriptToLatex
7
+ class Processor
8
+
9
+ # 수식 문자열 전처리
10
+ def pre_process(script)
11
+ # 족보닷컴 텍스트 제거
12
+ jokbo_regex = %r(from\s*=+\s*(?:족보닷컴[\s\S]*?)=+)
13
+ script = script.gsub(jokbo_regex, "")
14
+ # 2개 이상의 공백을 하나의 공백으로 치환
15
+ script = script.gsub(/\s+/, " ").strip
16
+ # 백슬래시(로만체)를 로만체 명령어로 변환
17
+ script = script.gsub(/^\\| \\/, " \\rm ")
18
+ # 꺽쇠 치환
19
+ script = script.gsub(/&lt;/, "<")
20
+ script = script.gsub(/&gt;/, ">")
21
+ # 위, 아래 첨자 명령어로 변경
22
+ script = script.gsub(/_/, " sub ")
23
+ script = script.gsub(/\^/, " sup ")
24
+
25
+ return script
26
+ end
27
+
28
+ # 수식 문자열 후처리
29
+ def post_process(script)
30
+ # 2개 이상의 공백을 하나의 공백으로 치환
31
+ script = script.gsub(/\s+/, " ").strip
32
+ end
33
+ end
34
+ end