tex_to_unicode 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +14 -8
- data/lib/tex_to_unicode/converter.rb +10 -0
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0d252355bf7d6843ae911db512e147434cad4d9e27232b7fc490be36b5347fd9
|
|
4
|
+
data.tar.gz: b440147cc001af9fae4dff17197c3c1fa42a97aaa047d4a79ab7ba59d680d7a3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6b9701d7934b7c210ce07b1970e4eda2c84eb02b436356cc41e9751927b00e67f35f3bddebf770ddd1cfe62262eb35139c53e82a7b07ba7ffa0e54f394e19b9c
|
|
7
|
+
data.tar.gz: 93e1cfb198af6380791ad579b5ca133a7527cf3055f66260b35f2d818a9032973d89fd618e46ae6298834db15eb1b53afca40d68baf495c73cc79a8af06f08eb
|
data/README.md
CHANGED
|
@@ -42,7 +42,7 @@ tex_to_unicode 'A \rightarrow B \Rightarrow C'
|
|
|
42
42
|
|
|
43
43
|
# Integrals and sums
|
|
44
44
|
tex_to_unicode '\int_0^\infty x^2 dx'
|
|
45
|
-
# Output:
|
|
45
|
+
# Output: ∫₀∞ x² dx
|
|
46
46
|
|
|
47
47
|
# Set notation
|
|
48
48
|
tex_to_unicode 'x \in \mathbb{R}, y \notin \emptyset'
|
|
@@ -65,9 +65,9 @@ result = TexToUnicode.convert('\alpha + \beta = \gamma')
|
|
|
65
65
|
puts result # => α + β = γ
|
|
66
66
|
|
|
67
67
|
# Use in string interpolation
|
|
68
|
-
formula = '\sum_{i=1}^n i
|
|
68
|
+
formula = '\sum_{i=1}^n i'
|
|
69
69
|
puts "The formula is: #{TexToUnicode.convert(formula)}"
|
|
70
|
-
# Output: The formula is:
|
|
70
|
+
# Output: The formula is: ∑i=1ⁿ i
|
|
71
71
|
```
|
|
72
72
|
|
|
73
73
|
## Supported Symbols
|
|
@@ -102,9 +102,11 @@ The gem supports a wide range of TeX symbols including:
|
|
|
102
102
|
- `\emptyset` (∅), `\therefore` (∴), `\because` (∵)
|
|
103
103
|
|
|
104
104
|
### Superscripts & Subscripts
|
|
105
|
-
- `^0` through `^9` (⁰¹²³⁴⁵⁶⁷⁸⁹)
|
|
106
|
-
- `
|
|
107
|
-
-
|
|
105
|
+
- Superscripts: `^0` through `^9` (⁰¹²³⁴⁵⁶⁷⁸⁹), `^i` (ⁱ), `^n` (ⁿ)
|
|
106
|
+
- Superscript symbols: `^+` (⁺), `^-` (⁻), `^=` (⁼), `^(` (⁽), `^)` (⁾)
|
|
107
|
+
- Subscripts: `_0` through `_9` (₀₁₂₃₄₅₆₇₈₉)
|
|
108
|
+
- Subscript symbols: `_+` (₊), `_-` (₋), `_=` (₌), `_(` (₍), `_)` (₎)
|
|
109
|
+
- Note: Unicode has limited super/subscript characters; unsupported characters will display normally
|
|
108
110
|
|
|
109
111
|
### Brackets
|
|
110
112
|
- `\langle`, `\rangle` (⟨⟩)
|
|
@@ -125,7 +127,7 @@ tex_to_unicode 'x = \frac{-b \pm \sqrt{b^2 - 4ac}}{2a}'
|
|
|
125
127
|
|
|
126
128
|
# Euler's identity
|
|
127
129
|
tex_to_unicode 'e^{i\pi} + 1 = 0'
|
|
128
|
-
# Output: e
|
|
130
|
+
# Output: eⁱπ + 1 = 0
|
|
129
131
|
|
|
130
132
|
# Set theory
|
|
131
133
|
tex_to_unicode 'A \cup B = \{x : x \in A \lor x \in B\}'
|
|
@@ -142,7 +144,11 @@ While Unicode provides many mathematical symbols, some TeX constructs cannot be
|
|
|
142
144
|
- Complex fractions are approximated
|
|
143
145
|
- Matrices and arrays have limited support
|
|
144
146
|
- Some accents and diacritics are approximated
|
|
145
|
-
- Subscripts and superscripts support limited characters
|
|
147
|
+
- **Subscripts and superscripts support limited characters** due to Unicode constraints:
|
|
148
|
+
- **Supported superscripts**: digits (⁰¹²³⁴⁵⁶⁷⁸⁹), letters i and n (ⁱⁿ), and symbols ⁺⁻⁼⁽⁾
|
|
149
|
+
- **Supported subscripts**: digits (₀₁₂₃₄₅₆₇₈₉) and symbols ₊₋₌₍₎
|
|
150
|
+
- Unsupported characters (like `^\infty`, `^\alpha`, `_\beta`) will have the `^` or `_` marker removed and display as regular characters
|
|
151
|
+
- Example: `\int_0^\infty` becomes `∫₀∞` (not `∫₀^∞` with superscript infinity, as Unicode has no superscript ∞)
|
|
146
152
|
|
|
147
153
|
## Development
|
|
148
154
|
|
|
@@ -117,11 +117,21 @@ module TexToUnicode
|
|
|
117
117
|
def self.convert(text)
|
|
118
118
|
result = text.dup
|
|
119
119
|
|
|
120
|
+
# Remove braces used for grouping first (before conversions)
|
|
121
|
+
# This allows super/subscripts within braces to be processed
|
|
122
|
+
result.gsub!(/\{([^}]*)\}/) { $1 }
|
|
123
|
+
|
|
120
124
|
# Sort by length (descending) to match longer patterns first
|
|
121
125
|
TEX_TO_UNICODE.keys.sort_by { |k| -k.length }.each do |tex|
|
|
122
126
|
result.gsub!(tex, TEX_TO_UNICODE[tex])
|
|
123
127
|
end
|
|
124
128
|
|
|
129
|
+
# Remove unsupported superscript/subscript markers
|
|
130
|
+
# If a ^ or _ is still present after conversion, it means that character
|
|
131
|
+
# doesn't have a Unicode super/subscript equivalent, so we remove the marker
|
|
132
|
+
result.gsub!(/\^(.)/) { $1 } # Remove ^ before any remaining character
|
|
133
|
+
result.gsub!(/_(.)/) { $1 } # Remove _ before any remaining character
|
|
134
|
+
|
|
125
135
|
result
|
|
126
136
|
end
|
|
127
137
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tex_to_unicode
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Thomas Powell
|
|
@@ -29,8 +29,8 @@ homepage: https://github.com/stringsn88keys/tex_to_unicode
|
|
|
29
29
|
licenses:
|
|
30
30
|
- MIT
|
|
31
31
|
metadata:
|
|
32
|
-
source_code_uri: https://github.com/
|
|
33
|
-
bug_tracker_uri: https://github.com/
|
|
32
|
+
source_code_uri: https://github.com/stringsn88keys/tex_to_unicode
|
|
33
|
+
bug_tracker_uri: https://github.com/stringsn88keys/tex_to_unicode/issues
|
|
34
34
|
post_install_message:
|
|
35
35
|
rdoc_options: []
|
|
36
36
|
require_paths:
|