hwp_script_to_latex 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/hwp_script_to_latex/converter.rb +53 -62
- data/lib/hwp_script_to_latex/processor.rb +34 -0
- data/lib/hwp_script_to_latex/syntax.rb +1326 -0
- data/lib/hwp_script_to_latex/validator.rb +84 -0
- data/lib/hwp_script_to_latex/version.rb +1 -1
- metadata +5 -3
- data/rules.json +0 -1277
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 5e948d02ae83f17c99fa96f38d16c3d0b257e7aa0f7ba4cea964fc300bb24975
         | 
| 4 | 
            +
              data.tar.gz: '0825cfac358661df69d6cfa0f487a89428129ffa60c1af6f8e4b182162ea5e8f'
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 35732e0afd39f4818ce2aab81458fa7366202631bea5800391df1954ba0f850d7631f57276273126ef6b293269d44239ffde63e155804629f100962b21ab7432
         | 
| 7 | 
            +
              data.tar.gz: f7647a86a3de1f04b9e503b0b34300824c29f2ce535c7b109aec3bfe1600ff37fa9edb7a504f96d11cda322697b9a9a0249587d4ff459d0b51c1825e045f3adc
         | 
| @@ -1,33 +1,34 @@ | |
| 1 1 | 
             
            require 'json'
         | 
| 2 | 
            +
            require 'hwp_script_to_latex/syntax'
         | 
| 3 | 
            +
            require 'hwp_script_to_latex/processor'
         | 
| 2 4 |  | 
| 3 5 | 
             
            module HwpScriptToLatex
         | 
| 4 6 | 
             
              # 한글 수식스크립트를 LaTeX 문법으로 변환
         | 
| 5 7 | 
             
              class Converter
         | 
| 8 | 
            +
                include Syntax
         | 
| 9 | 
            +
             | 
| 6 10 | 
             
                # 명령어에 사용되는 좌, 우항 정규표현식
         | 
| 7 11 | 
             
                LEFT_TERM_REGEX = "(?:[^{}\\s`]{9})*?\\K(?<%s>{(?>[^{}]+|(?:\\g<%s>))*}|(?:[^{}\\s`]{0,9}))"
         | 
| 8 12 | 
             
                RIGHT_TERM_REGEX = "(?<%s>{(?>[^{}]+|(?:\\g<%s>))*}|(?:[^{}\\s`]{0,9}))"
         | 
| 9 13 |  | 
| 10 14 | 
             
                def initialize
         | 
| 11 | 
            -
                   | 
| 12 | 
            -
                   | 
| 13 | 
            -
                   | 
| 14 | 
            -
             | 
| 15 | 
            -
             | 
| 16 | 
            -
                  @ | 
| 17 | 
            -
                  @ | 
| 18 | 
            -
                  @ | 
| 19 | 
            -
                  @command_keywords = @rules[:keyword][:command].sort_by { |hash| -1 * hash[:regex].inspect.length }
         | 
| 20 | 
            -
                  @symbol_keywords = @rules[:keyword][:symbol].sort_by { |hash| -1 * hash[:regex].inspect.length }
         | 
| 21 | 
            -
                  @reserved_keywords = @rules[:keyword][:reserved].sort_by { |hash| -1 * hash[:regex].inspect.length }
         | 
| 15 | 
            +
                  @processor = Processor.new
         | 
| 16 | 
            +
                  @keyword_commands = KEYWORD_COMMANDS
         | 
| 17 | 
            +
                  @default_commands = DEFAULT_COMMANDS
         | 
| 18 | 
            +
                  @block_commands = BLOCK_COMMANDS
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                  @meta = META.sort_by { |hash| -1 * hash[:regex].inspect.length }
         | 
| 21 | 
            +
                  @symbols = SYMBOL.sort_by { |hash| -1 * hash[:regex].inspect.length }
         | 
| 22 | 
            +
                  @reserved_words = RESERVED_WORD.sort_by { |hash| -1 * hash[:regex].inspect.length }
         | 
| 22 23 | 
             
                end
         | 
| 23 24 |  | 
| 24 25 | 
             
                def convert(script, math_mode: false, display_mode: false)
         | 
| 25 | 
            -
                  #  | 
| 26 | 
            -
                  result =  | 
| 27 | 
            -
                  #  | 
| 28 | 
            -
                  result =  | 
| 29 | 
            -
                  # 1개의 우항을 가지는  | 
| 30 | 
            -
                  result =  | 
| 26 | 
            +
                  # Data pre processing
         | 
| 27 | 
            +
                  result = @processor.pre_process(script)
         | 
| 28 | 
            +
                  # 파라미터가 없는 명령어
         | 
| 29 | 
            +
                  result = replace_keyword_commands(result)
         | 
| 30 | 
            +
                  # 1개의 우항을 가지는 명령어
         | 
| 31 | 
            +
                  result = replace_default_commands(result)
         | 
| 31 32 | 
             
                  # 행렬, 케이스등 블록 명령어
         | 
| 32 33 | 
             
                  #   cases {...} => \begin{cases}...\end{cases}
         | 
| 33 34 | 
             
                  #   dmatrix {...} => \begin{vmatrix}...\end{vmatrix}
         | 
| @@ -45,11 +46,14 @@ module HwpScriptToLatex | |
| 45 46 | 
             
                  result = replace_sqrt(result) # Case 1
         | 
| 46 47 | 
             
                  result = replace_fractions(result) # Case 2
         | 
| 47 48 |  | 
| 49 | 
            +
                  # 단순 치환 키워드
         | 
| 50 | 
            +
                  result = replace_keywords(result)
         | 
| 51 | 
            +
             | 
| 48 52 | 
             
                  # 전체 수식에 디스플레이 스타일 적용
         | 
| 49 53 | 
             
                  result = decorate_displaystyle(result) if display_mode
         | 
| 50 54 |  | 
| 51 | 
            -
                  #  | 
| 52 | 
            -
                  result =  | 
| 55 | 
            +
                  # Data post processing
         | 
| 56 | 
            +
                  result = @processor.post_process(result)
         | 
| 53 57 |  | 
| 54 58 | 
             
                  # Math mode
         | 
| 55 59 | 
             
                  result = "$#{result}$" if math_mode
         | 
| @@ -59,37 +63,40 @@ module HwpScriptToLatex | |
| 59 63 |  | 
| 60 64 | 
             
                private
         | 
| 61 65 |  | 
| 62 | 
            -
                 | 
| 63 | 
            -
             | 
| 64 | 
            -
                   | 
| 65 | 
            -
                   | 
| 66 | 
            -
                   | 
| 67 | 
            -
                  #  | 
| 68 | 
            -
                   | 
| 69 | 
            -
                  #  | 
| 70 | 
            -
                   | 
| 71 | 
            -
                  #  | 
| 72 | 
            -
                   | 
| 73 | 
            -
             | 
| 74 | 
            -
             | 
| 75 | 
            -
             | 
| 76 | 
            -
             | 
| 66 | 
            +
                def replace_keyword_commands(script)
         | 
| 67 | 
            +
                  commands = @keyword_commands
         | 
| 68 | 
            +
                  matched_count = 0
         | 
| 69 | 
            +
                  
         | 
| 70 | 
            +
                  # 1star(2star(3starcdotsstar(99star100))cdots)
         | 
| 71 | 
            +
                  # 위와 같은 경우 이중 루프 cdots 부터 매치되기 시작하면 star 때문에
         | 
| 72 | 
            +
                  # 제대로 변환되지 않음.
         | 
| 73 | 
            +
                  # 그러므로 전체 키워드를 검사하는 루프(loop 2)를
         | 
| 74 | 
            +
                  # 이중 루프로 변환이 완료될 때 까지 반복함(loop 1)
         | 
| 75 | 
            +
                  # 속도는 느려지겠지만 대안을 찾을 때 까지 이대로 사용하려고 함
         | 
| 76 | 
            +
                  loop do # loop 1
         | 
| 77 | 
            +
                    matched_count = 0
         | 
| 78 | 
            +
                    commands.each do |command|
         | 
| 79 | 
            +
                      command_regex = rule_regex(command)
         | 
| 80 | 
            +
                      before_script = script
         | 
| 81 | 
            +
                      script = script.gsub(command_regex, command[:latex])
         | 
| 82 | 
            +
                      while before_script != script # loop 2
         | 
| 83 | 
            +
                        matched_count += 1
         | 
| 84 | 
            +
                        before_script = script
         | 
| 85 | 
            +
                        script = script.gsub(command_regex, command[:latex])
         | 
| 86 | 
            +
                      end
         | 
| 87 | 
            +
                    end
         | 
| 88 | 
            +
                    break if matched_count == 0
         | 
| 89 | 
            +
                  end
         | 
| 77 90 |  | 
| 78 91 | 
             
                  return script
         | 
| 79 92 | 
             
                end
         | 
| 80 93 |  | 
| 81 | 
            -
                 | 
| 82 | 
            -
                def post_sanitize(script)
         | 
| 83 | 
            -
                  # 2개 이상의 공백을 하나의 공백으로 치환
         | 
| 84 | 
            -
                  script = script.gsub(/\s+/, " ").strip
         | 
| 85 | 
            -
                end
         | 
| 86 | 
            -
             | 
| 87 | 
            -
                def replace_simple_commands(script)
         | 
| 94 | 
            +
                def replace_default_commands(script)
         | 
| 88 95 | 
             
                  right_term_group_name = "rt"
         | 
| 89 96 |  | 
| 90 | 
            -
                  @ | 
| 97 | 
            +
                  @default_commands.each do |command|
         | 
| 91 98 | 
             
                    right_term_regex = RIGHT_TERM_REGEX % [right_term_group_name, right_term_group_name]
         | 
| 92 | 
            -
                    command_regex = %r((?<![a-zA-Z\\])(?i:#{command[: | 
| 99 | 
            +
                    command_regex = %r((?<![a-zA-Z\\])(?i:#{command[:regex]})\s*#{right_term_regex})
         | 
| 93 100 |  | 
| 94 101 | 
             
                    match_data = script.match(command_regex)
         | 
| 95 102 | 
             
                    while match_data
         | 
| @@ -106,7 +113,7 @@ module HwpScriptToLatex | |
| 106 113 |  | 
| 107 114 | 
             
                def replace_block_commands(script)
         | 
| 108 115 | 
             
                  @block_commands.each do |command|
         | 
| 109 | 
            -
                    command_regex = %r((?<![a-zA-Z])(?i:#{command[: | 
| 116 | 
            +
                    command_regex = %r((?<![a-zA-Z])(?i:#{command[:regex]})\s*(?<block_content>{(?>[^{}]+|(?:\g<block_content>))*}))
         | 
| 110 117 |  | 
| 111 118 | 
             
                    match_data = script.match(command_regex)
         | 
| 112 119 | 
             
                    while match_data
         | 
| @@ -123,7 +130,7 @@ module HwpScriptToLatex | |
| 123 130 | 
             
                end
         | 
| 124 131 |  | 
| 125 132 | 
             
                def replace_keywords(script)
         | 
| 126 | 
            -
                  keywords = @ | 
| 133 | 
            +
                  keywords = @meta + @symbols + @reserved_words
         | 
| 127 134 | 
             
                  matched_count = 0
         | 
| 128 135 |  | 
| 129 136 | 
             
                  # 1star(2star(3starcdotsstar(99star100))cdots)
         | 
| @@ -135,7 +142,7 @@ module HwpScriptToLatex | |
| 135 142 | 
             
                  loop do # loop 1
         | 
| 136 143 | 
             
                    matched_count = 0
         | 
| 137 144 | 
             
                    keywords.each do |keyword|
         | 
| 138 | 
            -
                      keyword_regex =  | 
| 145 | 
            +
                      keyword_regex = rule_regex(keyword)
         | 
| 139 146 | 
             
                      before_script = script
         | 
| 140 147 | 
             
                      script = script.gsub(keyword_regex, keyword[:latex])
         | 
| 141 148 | 
             
                      while before_script != script # loop 2
         | 
| @@ -201,22 +208,6 @@ module HwpScriptToLatex | |
| 201 208 | 
             
                  return script
         | 
| 202 209 | 
             
                end
         | 
| 203 210 |  | 
| 204 | 
            -
                def get_keyword_regex(keyword)
         | 
| 205 | 
            -
                  regexes = []
         | 
| 206 | 
            -
                  if keyword[:regex].class == Array
         | 
| 207 | 
            -
                    regexes = keyword[:regex]
         | 
| 208 | 
            -
                  else
         | 
| 209 | 
            -
                    regexes << keyword[:regex]
         | 
| 210 | 
            -
                  end
         | 
| 211 | 
            -
             | 
| 212 | 
            -
                  is_alphabetic = keyword[:alphabetic].nil? ? true : keyword[:alphabetic]
         | 
| 213 | 
            -
                  if is_alphabetic
         | 
| 214 | 
            -
                    return %r((?<![a-zA-Z\\])(#{regexes.join('|')}))
         | 
| 215 | 
            -
                  else
         | 
| 216 | 
            -
                    return %r((?<![\\])(#{regexes.join('|')}))
         | 
| 217 | 
            -
                  end
         | 
| 218 | 
            -
                end
         | 
| 219 | 
            -
             | 
| 220 211 | 
             
                def decorate_displaystyle(script)
         | 
| 221 212 | 
             
                  script = script.gsub(/\\sum/, "\\displaystyle \\sum")
         | 
| 222 213 | 
             
                  script = script.gsub(/\\int/, "\\displaystyle \\int")
         | 
| @@ -0,0 +1,34 @@ | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            #  Author: osh
         | 
| 3 | 
            +
            #  Created: 2019-05-27
         | 
| 4 | 
            +
            #  Last modified: 2019-05-27
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            module HwpScriptToLatex
         | 
| 7 | 
            +
              class Processor
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                # 수식 문자열 전처리
         | 
| 10 | 
            +
                def pre_process(script)
         | 
| 11 | 
            +
                  # 족보닷컴 텍스트 제거
         | 
| 12 | 
            +
                  jokbo_regex = %r(from\s*=+\s*(?:족보닷컴[\s\S]*?)=+)
         | 
| 13 | 
            +
                  script = script.gsub(jokbo_regex, "")
         | 
| 14 | 
            +
                  # 2개 이상의 공백을 하나의 공백으로 치환
         | 
| 15 | 
            +
                  script = script.gsub(/\s+/, " ").strip
         | 
| 16 | 
            +
                  # 백슬래시(로만체)를 로만체 명령어로 변환
         | 
| 17 | 
            +
                  script = script.gsub(/^\\| \\/, " \\rm ")
         | 
| 18 | 
            +
                  # 꺽쇠 치환
         | 
| 19 | 
            +
                  script = script.gsub(/</, "<")
         | 
| 20 | 
            +
                  script = script.gsub(/>/, ">")
         | 
| 21 | 
            +
                  # 위, 아래 첨자 명령어로 변경
         | 
| 22 | 
            +
                  script = script.gsub(/_/, " sub ")
         | 
| 23 | 
            +
                  script = script.gsub(/\^/, " sup ")
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                  return script
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                # 수식 문자열 후처리
         | 
| 29 | 
            +
                def post_process(script)
         | 
| 30 | 
            +
                  # 2개 이상의 공백을 하나의 공백으로 치환
         | 
| 31 | 
            +
                  script = script.gsub(/\s+/, " ").strip
         | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
              end
         | 
| 34 | 
            +
            end
         |