latex-decode 0.2.0 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +9 -5
- data/Gemfile +1 -1
- data/features/punctuation.feature +9 -6
- data/features/symbols.feature +21 -0
- data/latex-decode.gemspec +2 -3
- data/lib/latex/decode.rb +1 -1
- data/lib/latex/decode/base.rb +11 -8
- data/lib/latex/decode/compatibility.rb +18 -10
- data/lib/latex/decode/maths.rb +1 -1
- data/lib/latex/decode/punctuation.rb +1 -0
- data/lib/latex/decode/symbols.rb +4 -2
- data/lib/latex/decode/version.rb +1 -1
- metadata +10 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9192df4a57585120960f5b8f6e2aba62bd900cdd8c649a15cefca04f4819f2f3
|
4
|
+
data.tar.gz: 6d692566a583b9bb70a67763c6c803f4b7e98c2e9c50b006a720cee30b39c72d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a52d3db67061ed7c206cfa9b52d99ea181ff02bc2be8a21b6b55ebab19f8528565f3b4a882cef8c3b84f0319b99c63e5eef16d7c779ed80fc3405871bf95379
|
7
|
+
data.tar.gz: 0ac36cc1adaeff8d919ab78dc98fcecaadb663519ae6c3bd201327041a704c7e24b170f1143d700be8b999bb1561eb4c36e723b8aaf640fc4f78fb2b9ebfa5c8
|
data/.travis.yml
CHANGED
@@ -2,14 +2,18 @@ language: ruby
|
|
2
2
|
bundler_args: --without debug
|
3
3
|
sudo: false
|
4
4
|
cache: bundler
|
5
|
+
before_install:
|
6
|
+
- gem update --system
|
7
|
+
- gem update bundler
|
5
8
|
rvm:
|
6
|
-
-
|
7
|
-
- 2.
|
8
|
-
- 2.
|
9
|
-
- 2.
|
9
|
+
- 2.3
|
10
|
+
- 2.4
|
11
|
+
- 2.5
|
12
|
+
- 2.6
|
13
|
+
- 2.7
|
10
14
|
- jruby-19mode
|
11
15
|
- jruby-head
|
12
|
-
|
16
|
+
#- rbx-2
|
13
17
|
notifications:
|
14
18
|
email:
|
15
19
|
recipients:
|
data/Gemfile
CHANGED
@@ -15,7 +15,7 @@ group :debug do
|
|
15
15
|
gem 'rubinius-compiler', :require => false, :platforms => :rbx
|
16
16
|
end
|
17
17
|
|
18
|
-
gem 'unicode', '~> 0.4', :platforms => [:mri, :rbx, :mswin, :mingw]
|
18
|
+
gem 'unicode', '~> 0.4', :platforms => [:mri, :rbx, :mswin, :mingw] if RUBY_VERSION < '2.4'
|
19
19
|
gem 'ritex', '~> 1.0.1'
|
20
20
|
|
21
21
|
gem 'rubysl', '~> 2.0', :platforms => :rbx
|
@@ -29,15 +29,18 @@ Feature: Decode LaTeX punctuation directives
|
|
29
29
|
| \\textoverline | ‾ |
|
30
30
|
| \\langle | ⟨ |
|
31
31
|
| \\rangle | ⟩ |
|
32
|
+
| \\textquotesingle | ’ |
|
32
33
|
|
33
34
|
|
34
35
|
Scenarios: Punctuation symbols
|
35
|
-
| latex
|
36
|
-
| -
|
37
|
-
| --
|
38
|
-
| ---
|
39
|
-
| \\~{}
|
40
|
-
|
|
36
|
+
| latex | unicode | description |
|
37
|
+
| - | - | hyphen |
|
38
|
+
| -- | – | en-dash |
|
39
|
+
| --- | — | em-dash |
|
40
|
+
| \\~{} | ~ | tilde |
|
41
|
+
| \\textasciitilde{} | ~ | tilde |
|
42
|
+
| \\textasciitilde | ~ | tilde |
|
43
|
+
| X\\ X | X X | space |
|
41
44
|
|
42
45
|
Scenarios: Quotation marks
|
43
46
|
| latex | unicode | description |
|
@@ -0,0 +1,21 @@
|
|
1
|
+
Feature: Decode LaTeX symbol directives
|
2
|
+
As a hacker who works with LaTeX
|
3
|
+
I want to be able to decode LaTeX symbol directives
|
4
|
+
|
5
|
+
Scenario Outline: LaTeX to Unicode transformation
|
6
|
+
When I decode the string '<latex>'
|
7
|
+
Then the result should be '<unicode>'
|
8
|
+
|
9
|
+
Scenarios: Arrows
|
10
|
+
| latex | unicode |
|
11
|
+
| \\leftarrow | ← |
|
12
|
+
| \\uparrow | ↑ |
|
13
|
+
| \\downarrow | ↓ |
|
14
|
+
| \\rightarrow | → |
|
15
|
+
|
16
|
+
Scenarios: Whitespace
|
17
|
+
| latex | unicode | description |
|
18
|
+
| x\\,x | x x | small space |
|
19
|
+
| x~x | x x | non-breaking space |
|
20
|
+
| ~x | x | non-breaking space |
|
21
|
+
|
data/latex-decode.gemspec
CHANGED
@@ -12,12 +12,11 @@ Gem::Specification.new do |s|
|
|
12
12
|
s.homepage = 'http://github.com/inukshuk/latex-decode'
|
13
13
|
s.summary = 'Decodes LaTeX to Unicode.'
|
14
14
|
s.description = 'Decodes strings formatted in LaTeX to equivalent Unicode strings.'
|
15
|
-
s.license = 'GPL-3'
|
15
|
+
s.license = 'GPL-3.0'
|
16
16
|
|
17
17
|
if RUBY_PLATFORM =~ /java/
|
18
18
|
s.platform = 'java'
|
19
19
|
else
|
20
|
-
s.add_dependency('unicode', '~> 0.4')
|
21
20
|
s.platform = 'ruby'
|
22
21
|
end
|
23
22
|
|
@@ -30,4 +29,4 @@ Gem::Specification.new do |s|
|
|
30
29
|
s.extra_rdoc_files = %w{README.md LICENSE}
|
31
30
|
end
|
32
31
|
|
33
|
-
# vim: syntax=ruby
|
32
|
+
# vim: syntax=ruby
|
data/lib/latex/decode.rb
CHANGED
data/lib/latex/decode/base.rb
CHANGED
@@ -7,7 +7,7 @@ module LaTeX
|
|
7
7
|
class << self
|
8
8
|
attr_reader :patterns, :map
|
9
9
|
|
10
|
-
def inherited
|
10
|
+
def inherited(base)
|
11
11
|
subclasses << base
|
12
12
|
end
|
13
13
|
|
@@ -15,11 +15,11 @@ module LaTeX
|
|
15
15
|
@subclasses ||= []
|
16
16
|
end
|
17
17
|
|
18
|
-
def decode
|
18
|
+
def decode(string)
|
19
19
|
decode!(string.dup)
|
20
20
|
end
|
21
21
|
|
22
|
-
def decode!
|
22
|
+
def decode!(string)
|
23
23
|
patterns.each do |pattern|
|
24
24
|
string.gsub!(pattern) { |m| [$2,map[$1],$3].compact.join }
|
25
25
|
end
|
@@ -32,22 +32,25 @@ module LaTeX
|
|
32
32
|
|
33
33
|
module_function
|
34
34
|
|
35
|
-
def normalize
|
35
|
+
def normalize(string)
|
36
36
|
string.gsub!(/\\(?:i|j)\b/) { |m| m == '\\i' ? 'ı' : 'ȷ' }
|
37
37
|
|
38
|
-
|
38
|
+
# \foo\ bar -> \foo{} bar
|
39
39
|
string.gsub!(/(\\[a-zA-Z]+)\\(\s+)/, '\1{}\2')
|
40
40
|
|
41
|
-
|
41
|
+
# Aaaa\o, -> Aaaa\o{},
|
42
42
|
string.gsub!(/([^{]\\\w)([;,.:%])/, '\1{}\2')
|
43
43
|
|
44
|
-
|
44
|
+
# \c cb -> \c{cb}
|
45
45
|
string.gsub!(/(\\[^\sij&#\$\{\}_~%])\s+([[:alpha:]]+)\b/i, '\1{\2}')
|
46
46
|
|
47
|
+
# non-breaking spaces
|
48
|
+
string.gsub!(/(\A|[^\\])~/, LaTeX.to_unicode("\\1\u00a0"))
|
49
|
+
|
47
50
|
string
|
48
51
|
end
|
49
52
|
|
50
|
-
def strip_braces
|
53
|
+
def strip_braces(string)
|
51
54
|
string.gsub!(/(^|[^\\])([\{\}]+)/, '\1')
|
52
55
|
string.gsub!(/\\(\{|\})/, '\1')
|
53
56
|
string
|
@@ -33,27 +33,35 @@ if RUBY_PLATFORM == 'java'
|
|
33
33
|
end
|
34
34
|
|
35
35
|
else
|
36
|
-
|
37
|
-
require 'unicode'
|
38
|
-
|
39
|
-
# Use the Unicode gem
|
36
|
+
if RUBY_VERSION >= '2.3'
|
40
37
|
module LaTeX
|
41
38
|
def self.normalize_C(string)
|
42
|
-
|
39
|
+
string.unicode_normalize(:nfc)
|
43
40
|
end
|
44
41
|
end
|
45
|
-
|
42
|
+
else
|
46
43
|
begin
|
47
|
-
require '
|
44
|
+
require 'unicode'
|
48
45
|
|
49
|
-
# Use
|
46
|
+
# Use the Unicode gem
|
50
47
|
module LaTeX
|
51
48
|
def self.normalize_C(string)
|
52
|
-
|
49
|
+
Unicode::normalize_C(string)
|
53
50
|
end
|
54
51
|
end
|
55
52
|
rescue LoadError
|
56
|
-
|
53
|
+
begin
|
54
|
+
require 'active_support/multibyte/chars'
|
55
|
+
|
56
|
+
# Use ActiveSupport's normalizer
|
57
|
+
module LaTeX
|
58
|
+
def self.normalize_C(string)
|
59
|
+
ActiveSupport::Multibyte::Chars.new(string).normalize(:c).to_s
|
60
|
+
end
|
61
|
+
end
|
62
|
+
rescue LoadError
|
63
|
+
fail "Failed to load unicode normalizer: please gem install unicode (or active_support)"
|
64
|
+
end
|
57
65
|
end
|
58
66
|
end
|
59
67
|
end
|
data/lib/latex/decode/maths.rb
CHANGED
data/lib/latex/decode/symbols.rb
CHANGED
@@ -4,7 +4,8 @@ module LaTeX
|
|
4
4
|
module Decode
|
5
5
|
|
6
6
|
class Symbols < Decoder
|
7
|
-
|
7
|
+
|
8
|
+
@map = Hash[*%W{
|
8
9
|
textcolonmonetary ₡
|
9
10
|
textlira ₤
|
10
11
|
textnaira ₦
|
@@ -211,7 +212,8 @@ module LaTeX
|
|
211
212
|
tone1 ˩
|
212
213
|
ss ß
|
213
214
|
, \u2009
|
214
|
-
}].freeze
|
215
|
+
}.map { |s| LaTeX.to_unicode(s) }].freeze
|
216
|
+
|
215
217
|
|
216
218
|
@patterns = [
|
217
219
|
/\\(#{ map.keys.map { |k| Regexp.escape(k) }.join('|') })(?:\{\}|\s+|\b)/ou
|
data/lib/latex/decode/version.rb
CHANGED
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: latex-decode
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: unicode
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0.4'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0.4'
|
11
|
+
date: 2020-10-14 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
27
13
|
description: Decodes strings formatted in LaTeX to equivalent Unicode strings.
|
28
14
|
email:
|
29
15
|
- http://sylvester.keil.or.at
|
@@ -49,6 +35,7 @@ files:
|
|
49
35
|
- features/special_characters.feature
|
50
36
|
- features/step_definitions/latex.rb
|
51
37
|
- features/support/env.rb
|
38
|
+
- features/symbols.feature
|
52
39
|
- features/umlauts.feature
|
53
40
|
- latex-decode.gemspec
|
54
41
|
- lib/latex/decode.rb
|
@@ -63,9 +50,9 @@ files:
|
|
63
50
|
- lib/latex/decode/version.rb
|
64
51
|
homepage: http://github.com/inukshuk/latex-decode
|
65
52
|
licenses:
|
66
|
-
- GPL-3
|
53
|
+
- GPL-3.0
|
67
54
|
metadata: {}
|
68
|
-
post_install_message:
|
55
|
+
post_install_message:
|
69
56
|
rdoc_options:
|
70
57
|
- "--line-numbers"
|
71
58
|
- "--inline-source"
|
@@ -87,9 +74,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
87
74
|
- !ruby/object:Gem::Version
|
88
75
|
version: '0'
|
89
76
|
requirements: []
|
90
|
-
|
91
|
-
|
92
|
-
signing_key:
|
77
|
+
rubygems_version: 3.1.2
|
78
|
+
signing_key:
|
93
79
|
specification_version: 4
|
94
80
|
summary: Decodes LaTeX to Unicode.
|
95
81
|
test_files:
|
@@ -102,5 +88,5 @@ test_files:
|
|
102
88
|
- features/special_characters.feature
|
103
89
|
- features/step_definitions/latex.rb
|
104
90
|
- features/support/env.rb
|
91
|
+
- features/symbols.feature
|
105
92
|
- features/umlauts.feature
|
106
|
-
has_rdoc:
|