tex_to_unicode 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3fe364a184364e8e45c9ca5bd2c68cabe453f793283cc38071c897b40351ef7a
4
- data.tar.gz: 6e22bdffa2357e2d06f673ee326c0c2541f7967aed09a45c59e1e6a196b31108
3
+ metadata.gz: 0d252355bf7d6843ae911db512e147434cad4d9e27232b7fc490be36b5347fd9
4
+ data.tar.gz: b440147cc001af9fae4dff17197c3c1fa42a97aaa047d4a79ab7ba59d680d7a3
5
5
  SHA512:
6
- metadata.gz: bb9aafedc15f45a9fc13031edc3d52b467e76259375707c06a335c3b59ff7f15b8b7d36106cb92270a36546ab8dfd6ee1aa6ece1350a92f4fa6310547083b7c5
7
- data.tar.gz: 9dd89a04e9585dd505eb12717a24930c97957e73b14e0883833117d7376283e0fcaca96ea9be76cd7363818184fbf866ccf38a07f80d0a3457393fba3896682a
6
+ metadata.gz: 6b9701d7934b7c210ce07b1970e4eda2c84eb02b436356cc41e9751927b00e67f35f3bddebf770ddd1cfe62262eb35139c53e82a7b07ba7ffa0e54f394e19b9c
7
+ data.tar.gz: 93e1cfb198af6380791ad579b5ca133a7527cf3055f66260b35f2d818a9032973d89fd618e46ae6298834db15eb1b53afca40d68baf495c73cc79a8af06f08eb
data/README.md CHANGED
@@ -42,7 +42,7 @@ tex_to_unicode 'A \rightarrow B \Rightarrow C'
42
42
 
43
43
  # Integrals and sums
44
44
  tex_to_unicode '\int_0^\infty x^2 dx'
45
- # Output: ∫₀^∞ x² dx
45
+ # Output: ∫₀∞ x² dx
46
46
 
47
47
  # Set notation
48
48
  tex_to_unicode 'x \in \mathbb{R}, y \notin \emptyset'
@@ -65,9 +65,9 @@ result = TexToUnicode.convert('\alpha + \beta = \gamma')
65
65
  puts result # => α + β = γ
66
66
 
67
67
  # Use in string interpolation
68
- formula = '\sum_{i=1}^n i = \frac{n(n+1)}{2}'
68
+ formula = '\sum_{i=1}^n i'
69
69
  puts "The formula is: #{TexToUnicode.convert(formula)}"
70
- # Output: The formula is: ∑ᵢ₌₁ⁿ i = n(n+1)/2
70
+ # Output: The formula is: i=1ⁿ i
71
71
  ```
72
72
 
73
73
  ## Supported Symbols
@@ -102,9 +102,11 @@ The gem supports a wide range of TeX symbols including:
102
102
  - `\emptyset` (∅), `\therefore` (∴), `\because` (∵)
103
103
 
104
104
  ### Superscripts & Subscripts
105
- - `^0` through `^9` (⁰¹²³⁴⁵⁶⁷⁸⁹)
106
- - `_0` through `_9` (₀₁₂₃₄₅₆₇₈₉)
107
- - `^+`, `^-`, `^(`, `^)` and subscript equivalents
105
+ - Superscripts: `^0` through `^9` (⁰¹²³⁴⁵⁶⁷⁸⁹), `^i` (ⁱ), `^n` (ⁿ)
106
+ - Superscript symbols: `^+` (⁺), `^-` (⁻), `^=` (⁼), `^(` (⁽), `^)` ()
107
+ - Subscripts: `_0` through `_9` (₀₁₂₃₄₅₆₇₈₉)
108
+ - Subscript symbols: `_+` (₊), `_-` (₋), `_=` (₌), `_(` (₍), `_)` (₎)
109
+ - Note: Unicode has limited super/subscript characters; unsupported characters will display normally
108
110
 
109
111
  ### Brackets
110
112
  - `\langle`, `\rangle` (⟨⟩)
@@ -125,7 +127,7 @@ tex_to_unicode 'x = \frac{-b \pm \sqrt{b^2 - 4ac}}{2a}'
125
127
 
126
128
  # Euler's identity
127
129
  tex_to_unicode 'e^{i\pi} + 1 = 0'
128
- # Output: eⁱᵖⁱ + 1 = 0
130
+ # Output: eⁱπ + 1 = 0
129
131
 
130
132
  # Set theory
131
133
  tex_to_unicode 'A \cup B = \{x : x \in A \lor x \in B\}'
@@ -142,7 +144,11 @@ While Unicode provides many mathematical symbols, some TeX constructs cannot be
142
144
  - Complex fractions are approximated
143
145
  - Matrices and arrays have limited support
144
146
  - Some accents and diacritics are approximated
145
- - Subscripts and superscripts support limited characters
147
+ - **Subscripts and superscripts support limited characters** due to Unicode constraints:
148
+ - **Supported superscripts**: digits (⁰¹²³⁴⁵⁶⁷⁸⁹), letters i and n (ⁱⁿ), and symbols ⁺⁻⁼⁽⁾
149
+ - **Supported subscripts**: digits (₀₁₂₃₄₅₆₇₈₉) and symbols ₊₋₌₍₎
150
+ - Unsupported characters (like `^\infty`, `^\alpha`, `_\beta`) will have the `^` or `_` marker removed and display as regular characters
151
+ - Example: `\int_0^\infty` becomes `∫₀∞` (not `∫₀^∞` with superscript infinity, as Unicode has no superscript ∞)
146
152
 
147
153
  ## Development
148
154
 
@@ -117,11 +117,21 @@ module TexToUnicode
117
117
  def self.convert(text)
118
118
  result = text.dup
119
119
 
120
+ # Remove braces used for grouping first (before conversions)
121
+ # This allows super/subscripts within braces to be processed
122
+ result.gsub!(/\{([^}]*)\}/) { $1 }
123
+
120
124
  # Sort by length (descending) to match longer patterns first
121
125
  TEX_TO_UNICODE.keys.sort_by { |k| -k.length }.each do |tex|
122
126
  result.gsub!(tex, TEX_TO_UNICODE[tex])
123
127
  end
124
128
 
129
+ # Remove unsupported superscript/subscript markers
130
+ # If a ^ or _ is still present after conversion, it means that character
131
+ # doesn't have a Unicode super/subscript equivalent, so we remove the marker
132
+ result.gsub!(/\^(.)/) { $1 } # Remove ^ before any remaining character
133
+ result.gsub!(/_(.)/) { $1 } # Remove _ before any remaining character
134
+
125
135
  result
126
136
  end
127
137
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tex_to_unicode
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Powell