hwp_script_to_latex 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8a1f7260a320ad6b3be51ded4f0a61b2da7d0143ed340715e37a0ebdcf89f2e1
4
- data.tar.gz: 045d8409baaeff28e2803f13f7c5fe152e8f7100749d40a253fef6f2f21f6467
3
+ metadata.gz: 5e948d02ae83f17c99fa96f38d16c3d0b257e7aa0f7ba4cea964fc300bb24975
4
+ data.tar.gz: '0825cfac358661df69d6cfa0f487a89428129ffa60c1af6f8e4b182162ea5e8f'
5
5
  SHA512:
6
- metadata.gz: 020d0e2857c38dbf1aa4ae5051150631bcb4033f075fef06c2229923fce601ca74c9d3058ccaa5e106b3eb2c923528af7b3993802681997a4869d29087333de5
7
- data.tar.gz: 5addae0db39cea048c66807b8f1141a31c8206f63dd3788a995bedb3bfb59331096463924eca23f3e2c1c8ba934fab1df9cb007fa5457c185c9e7f0369627b07
6
+ metadata.gz: 35732e0afd39f4818ce2aab81458fa7366202631bea5800391df1954ba0f850d7631f57276273126ef6b293269d44239ffde63e155804629f100962b21ab7432
7
+ data.tar.gz: f7647a86a3de1f04b9e503b0b34300824c29f2ce535c7b109aec3bfe1600ff37fa9edb7a504f96d11cda322697b9a9a0249587d4ff459d0b51c1825e045f3adc
@@ -1,33 +1,34 @@
1
1
  require 'json'
2
+ require 'hwp_script_to_latex/syntax'
3
+ require 'hwp_script_to_latex/processor'
2
4
 
3
5
  module HwpScriptToLatex
4
6
  # 한글 수식스크립트를 LaTeX 문법으로 변환
5
7
  class Converter
8
+ include Syntax
9
+
6
10
  # 명령어에 사용되는 좌, 우항 정규표현식
7
11
  LEFT_TERM_REGEX = "(?:[^{}\\s`]{9})*?\\K(?<%s>{(?>[^{}]+|(?:\\g<%s>))*}|(?:[^{}\\s`]{0,9}))"
8
12
  RIGHT_TERM_REGEX = "(?<%s>{(?>[^{}]+|(?:\\g<%s>))*}|(?:[^{}\\s`]{0,9}))"
9
13
 
10
14
  def initialize
11
- f = File.open(File.join(File.dirname(__FILE__), '..', '..', 'rules.json'), 'r')
12
- file_content = f.read
13
- f.close
14
-
15
- @rules= JSON.parse(file_content, symbolize_names: true)
16
- @simple_commands = @rules[:command][:simple].sort_by { |hash| -1 * hash[:script].length }
17
- @block_commands = @rules[:command][:block]
18
- @meta_keywords = @rules[:keyword][:meta].sort_by { |hash| -1 * hash[:regex].inspect.length }
19
- @command_keywords = @rules[:keyword][:command].sort_by { |hash| -1 * hash[:regex].inspect.length }
20
- @symbol_keywords = @rules[:keyword][:symbol].sort_by { |hash| -1 * hash[:regex].inspect.length }
21
- @reserved_keywords = @rules[:keyword][:reserved].sort_by { |hash| -1 * hash[:regex].inspect.length }
15
+ @processor = Processor.new
16
+ @keyword_commands = KEYWORD_COMMANDS
17
+ @default_commands = DEFAULT_COMMANDS
18
+ @block_commands = BLOCK_COMMANDS
19
+
20
+ @meta = META.sort_by { |hash| -1 * hash[:regex].inspect.length }
21
+ @symbols = SYMBOL.sort_by { |hash| -1 * hash[:regex].inspect.length }
22
+ @reserved_words = RESERVED_WORD.sort_by { |hash| -1 * hash[:regex].inspect.length }
22
23
  end
23
24
 
24
25
  def convert(script, math_mode: false, display_mode: false)
25
- # Sanitize for starting convert
26
- result = pre_sanitize(script)
27
- # 단순 치환 키워드
28
- result = replace_keywords(result)
29
- # 1개의 우항을 가지는 간단한 명령어
30
- result = replace_simple_commands(result)
26
+ # Data pre processing
27
+ result = @processor.pre_process(script)
28
+ # 파라미터가 없는 명령어
29
+ result = replace_keyword_commands(result)
30
+ # 1개의 우항을 가지는 명령어
31
+ result = replace_default_commands(result)
31
32
  # 행렬, 케이스등 블록 명령어
32
33
  # cases {...} => \begin{cases}...\end{cases}
33
34
  # dmatrix {...} => \begin{vmatrix}...\end{vmatrix}
@@ -45,11 +46,14 @@ module HwpScriptToLatex
45
46
  result = replace_sqrt(result) # Case 1
46
47
  result = replace_fractions(result) # Case 2
47
48
 
49
+ # 단순 치환 키워드
50
+ result = replace_keywords(result)
51
+
48
52
  # 전체 수식에 디스플레이 스타일 적용
49
53
  result = decorate_displaystyle(result) if display_mode
50
54
 
51
- # Sanitize result for removing dirty spaces
52
- result = post_sanitize(result)
55
+ # Data post processing
56
+ result = @processor.post_process(result)
53
57
 
54
58
  # Math mode
55
59
  result = "$#{result}$" if math_mode
@@ -59,37 +63,40 @@ module HwpScriptToLatex
59
63
 
60
64
  private
61
65
 
62
- # 변환 전 sanitize
63
- def pre_sanitize(script)
64
- # 족보닷컴 텍스트 제거
65
- jokbo_regex = %r(from\s*=+\s*(?:족보닷컴[\s\S]*?)=+)
66
- script = script.gsub(jokbo_regex, "")
67
- # 2개 이상의 공백을 하나의 공백으로 치환
68
- script = script.gsub(/\s+/, " ").strip
69
- # 백슬래시(로만체)를 삭제
70
- script = script.gsub(/\\/, "")
71
- # 꺽쇠 치환
72
- script = script.gsub(/&lt;/, "<")
73
- script = script.gsub(/&gt;/, ">")
74
- # 위, 아래 첨자 명령어로 변경
75
- script = script.gsub(/_/, " sub ")
76
- script = script.gsub(/\^/, " sup ")
66
+ def replace_keyword_commands(script)
67
+ commands = @keyword_commands
68
+ matched_count = 0
69
+
70
+ # 1star(2star(3starcdotsstar(99star100))cdots)
71
+ # 위와 같은 경우 이중 루프 cdots 부터 매치되기 시작하면 star 때문에
72
+ # 제대로 변환되지 않음.
73
+ # 그러므로 전체 키워드를 검사하는 루프(loop 2)를
74
+ # 이중 루프로 변환이 완료될 때 까지 반복함(loop 1)
75
+ # 속도는 느려지겠지만 대안을 찾을 때 까지 이대로 사용하려고 함
76
+ loop do # loop 1
77
+ matched_count = 0
78
+ commands.each do |command|
79
+ command_regex = rule_regex(command)
80
+ before_script = script
81
+ script = script.gsub(command_regex, command[:latex])
82
+ while before_script != script # loop 2
83
+ matched_count += 1
84
+ before_script = script
85
+ script = script.gsub(command_regex, command[:latex])
86
+ end
87
+ end
88
+ break if matched_count == 0
89
+ end
77
90
 
78
91
  return script
79
92
  end
80
93
 
81
- # 변환 후 sanitize
82
- def post_sanitize(script)
83
- # 2개 이상의 공백을 하나의 공백으로 치환
84
- script = script.gsub(/\s+/, " ").strip
85
- end
86
-
87
- def replace_simple_commands(script)
94
+ def replace_default_commands(script)
88
95
  right_term_group_name = "rt"
89
96
 
90
- @simple_commands.each do |command|
97
+ @default_commands.each do |command|
91
98
  right_term_regex = RIGHT_TERM_REGEX % [right_term_group_name, right_term_group_name]
92
- command_regex = %r((?<![a-zA-Z\\])(?i:#{command[:script]})\s*#{right_term_regex})
99
+ command_regex = %r((?<![a-zA-Z\\])(?i:#{command[:regex]})\s*#{right_term_regex})
93
100
 
94
101
  match_data = script.match(command_regex)
95
102
  while match_data
@@ -106,7 +113,7 @@ module HwpScriptToLatex
106
113
 
107
114
  def replace_block_commands(script)
108
115
  @block_commands.each do |command|
109
- command_regex = %r((?<![a-zA-Z])(?i:#{command[:script]})\s*(?<block_content>{(?>[^{}]+|(?:\g<block_content>))*}))
116
+ command_regex = %r((?<![a-zA-Z])(?i:#{command[:regex]})\s*(?<block_content>{(?>[^{}]+|(?:\g<block_content>))*}))
110
117
 
111
118
  match_data = script.match(command_regex)
112
119
  while match_data
@@ -123,7 +130,7 @@ module HwpScriptToLatex
123
130
  end
124
131
 
125
132
  def replace_keywords(script)
126
- keywords = @meta_keywords + @command_keywords + @symbol_keywords + @reserved_keywords
133
+ keywords = @meta + @symbols + @reserved_words
127
134
  matched_count = 0
128
135
 
129
136
  # 1star(2star(3starcdotsstar(99star100))cdots)
@@ -135,7 +142,7 @@ module HwpScriptToLatex
135
142
  loop do # loop 1
136
143
  matched_count = 0
137
144
  keywords.each do |keyword|
138
- keyword_regex = get_keyword_regex(keyword)
145
+ keyword_regex = rule_regex(keyword)
139
146
  before_script = script
140
147
  script = script.gsub(keyword_regex, keyword[:latex])
141
148
  while before_script != script # loop 2
@@ -201,22 +208,6 @@ module HwpScriptToLatex
201
208
  return script
202
209
  end
203
210
 
204
- def get_keyword_regex(keyword)
205
- regexes = []
206
- if keyword[:regex].class == Array
207
- regexes = keyword[:regex]
208
- else
209
- regexes << keyword[:regex]
210
- end
211
-
212
- is_alphabetic = keyword[:alphabetic].nil? ? true : keyword[:alphabetic]
213
- if is_alphabetic
214
- return %r((?<![a-zA-Z\\])(#{regexes.join('|')}))
215
- else
216
- return %r((?<![\\])(#{regexes.join('|')}))
217
- end
218
- end
219
-
220
211
  def decorate_displaystyle(script)
221
212
  script = script.gsub(/\\sum/, "\\displaystyle \\sum")
222
213
  script = script.gsub(/\\int/, "\\displaystyle \\int")
@@ -0,0 +1,34 @@
1
+ #
2
+ # Author: osh
3
+ # Created: 2019-05-27
4
+ # Last modified: 2019-05-27
5
+
6
+ module HwpScriptToLatex
7
+ class Processor
8
+
9
+ # 수식 문자열 전처리
10
+ def pre_process(script)
11
+ # 족보닷컴 텍스트 제거
12
+ jokbo_regex = %r(from\s*=+\s*(?:족보닷컴[\s\S]*?)=+)
13
+ script = script.gsub(jokbo_regex, "")
14
+ # 2개 이상의 공백을 하나의 공백으로 치환
15
+ script = script.gsub(/\s+/, " ").strip
16
+ # 백슬래시(로만체)를 로만체 명령어로 변환
17
+ script = script.gsub(/^\\| \\/, " \\rm ")
18
+ # 꺽쇠 치환
19
+ script = script.gsub(/&lt;/, "<")
20
+ script = script.gsub(/&gt;/, ">")
21
+ # 위, 아래 첨자 명령어로 변경
22
+ script = script.gsub(/_/, " sub ")
23
+ script = script.gsub(/\^/, " sup ")
24
+
25
+ return script
26
+ end
27
+
28
+ # 수식 문자열 후처리
29
+ def post_process(script)
30
+ # 2개 이상의 공백을 하나의 공백으로 치환
31
+ script = script.gsub(/\s+/, " ").strip
32
+ end
33
+ end
34
+ end