truty 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/truty +6 -1
- data/lib/truty.rb +3 -1
- data/lib/truty/czech.rb +29 -32
- data/lib/truty/english.rb +37 -0
- data/lib/truty/french.rb +36 -0
- data/lib/truty/general.rb +31 -49
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4feee35354a4825efaf7b001e6e75d5da250d16d
|
4
|
+
data.tar.gz: b026eb59f352ba1eb611a19eac14d87b4b2d426f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 362609e41ac202bcfb3f46be87e9b6caeb8f6c82b424086feca76222b54bfb9aa60690fd5c0a27909feb58a7f26fb3bd468a72608af6b31125ef4500f0200c49
|
7
|
+
data.tar.gz: 100ae5a99c6a5552a8c99aec4444e5dfdd820fba072066b0105675db41003fee0ced281181f82fe0459e71926384a45715f10769996c2c611c76ba1fe484faa1
|
data/bin/truty
CHANGED
data/lib/truty.rb
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
require 'uri'
|
4
4
|
require 'text/hyphen'
|
5
5
|
require 'truty/general'
|
6
|
+
require 'truty/english'
|
7
|
+
require 'truty/french'
|
6
8
|
require 'truty/czech'
|
7
9
|
|
8
10
|
# A Ruby library which is a simple string converter, which aims to fix all the typography imperfections of the plain text.
|
@@ -10,7 +12,7 @@ require 'truty/czech'
|
|
10
12
|
module Truty
|
11
13
|
|
12
14
|
extend General
|
13
|
-
extend Czech
|
15
|
+
extend English, French, Czech
|
14
16
|
|
15
17
|
end
|
16
18
|
|
data/lib/truty/czech.rb
CHANGED
@@ -5,52 +5,33 @@ module Truty
|
|
5
5
|
# @author Matěj Kašpar Jirásek
|
6
6
|
module Czech
|
7
7
|
|
8
|
-
# Improves the typography of
|
9
|
-
#
|
10
|
-
# @param input [String] The text which will be converted.
|
11
|
-
# @return [String] Text with improved typography.
|
12
|
-
def fix_czech_text(input)
|
13
|
-
input.split("\n").collect { |p| fix_czech_paragraph(p) }.join("\n")
|
14
|
-
end
|
15
|
-
|
16
|
-
# Improves the Czech typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {#fix_czech_text}.
|
8
|
+
# Improves the Czech typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
|
17
9
|
#
|
18
10
|
# @param input [String] The paragraph which will be converted.
|
19
11
|
# @return [String] Paragraph with improved typography.
|
20
|
-
def
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
output = endash_spaces(output)
|
29
|
-
output = fix_quotes(output, "\"", "„", "“")
|
30
|
-
output = fix_quotes(output, "'", "‚", "‘")
|
31
|
-
output = fix_multiplication_sign(output)
|
32
|
-
output = fix_space_between_numbers(output)
|
33
|
-
output = fix_units(output)
|
34
|
-
output = fix_trailing_spaces(output)
|
35
|
-
output = fix_widows(output)
|
36
|
-
output = fix_long_czech_numbers(output)
|
37
|
-
output = fix_czech_one_character_words(output)
|
38
|
-
output = fix_czech_abbreviations(output)
|
12
|
+
def czech(input)
|
13
|
+
input = soft_hyphens(input, "cs")
|
14
|
+
input = general(input)
|
15
|
+
input = czech_double_quotes(input)
|
16
|
+
input = czech_single_quotes(input)
|
17
|
+
input = czech_long_numbers(input)
|
18
|
+
input = czech_prepositions(input)
|
19
|
+
input = czech_abbreviations(input)
|
39
20
|
end
|
40
21
|
|
41
22
|
# Adds non-breaking space after Czech one character prepostion.
|
42
23
|
#
|
43
24
|
# @param input [String] The paragraph which will be converted.
|
44
25
|
# @return [String] Paragraph with non-breaking spaces after prepositions.
|
45
|
-
def
|
46
|
-
input.gsub(/(\s+|^|\A)(([aikosuvz]\s+)+)/i) {
|
26
|
+
def czech_prepositions(input)
|
27
|
+
input.gsub(/(\s+|^|\A)(([aikosuvz]\s+)+)/i) { $1 + $2.gsub(/\s+/, " ") }
|
47
28
|
end
|
48
29
|
|
49
30
|
# Divides long numbers into parts of three digits using thin space.
|
50
31
|
#
|
51
32
|
# @param input [String] The paragraph which will be converted.
|
52
33
|
# @return [String] Paragraph with spaces inside of long numbers.
|
53
|
-
def
|
34
|
+
def czech_long_numbers(input)
|
54
35
|
input.gsub(/\d+/) { |n| n.reverse.scan(/(.{1,3})/).join(' ').reverse }
|
55
36
|
end
|
56
37
|
|
@@ -58,10 +39,26 @@ module Truty
|
|
58
39
|
#
|
59
40
|
# @param input [String] The paragraph which will be converted.
|
60
41
|
# @return [String] Paragraph with non-breaking spaces in and after abbreviations.
|
61
|
-
def
|
42
|
+
def czech_abbreviations(input)
|
62
43
|
abbreviations = /(a. s.|abl. |absol. |adj. |adm. |adv. |aj.|ak. |ak. sl.|akt. |alch. |amer. |anat. |angl. |anglosas. |ap.|apod.|arab. |arch. |archit. |arg. |arm. gen. |astr. |astrol. |atd.|atp.|att. |b. k.|Bc. |BcA. |belg. |bibl. |biol. |bl. |boh. |bot. |br. |brig. gen. |brit. |bulh. |bás. |býv. |chcsl. |chem. |chil. |CSc. |csl. |círk. |dat. |dep. |des. |dial. |DiS.|dl. |doc. |dol. |dop. |dopr. |dosl. |dán. |dór. |děj. |dět. |ekon. |epic. |etnonym. |eufem. |ev. |event. |f. |fam. |fem. |fil. |film. |fin. |form. |fot. |fr. |fut. |fyz. |gen. |genmjr. |genplk. |genpor. |geogr. |geol. |geom. |germ. |gram. |hebr. |herald. |hist. |hl. |hod. |hor. |horn. |hovor. |hud. |hut. |ie. |imp. |impf. |ind. |indoevr. |inf. |Ing. |instr. |interj. |iron. |it. |ión. |j. č.|jap. |JUDr. |k. s.|kanad. |katalán. |klas. |kniž. |komp. |konj. |konkr. |kpt. |kr. |kuch. |kř. |lat. |les. |lid. |lit. |liturg. |log. |lok. |lék. |m. |mat. |meteor. |metr. |MgA. |Mgr. |mil. |mj. |mjr. |ml. |mld. |mn. č.|mod. |ms. |MUDr. |MVDr. |mysl. |n. |n. l.|např. |neklas. |nesklon. |než. |niz. |nom. |nor. |npor. |nprap. |nrtm. |nstržm. |náb. |nám. |námoř. |něm. |o. p. s.|o. s.|ob. |obch. |obyč. |odd. |odp. |ojed. |opt. |p. |p. n. l.|p. o.|P. S. |P. T. |part. |pas. |pejor. |pers. |pf. |PharmDr. |PhDr. |pl. |plk. |plpf. |po Kr.|pol. |pomn. |popř. |por. |pplk. |ppor. |pprap. |prap. |prep. |prof. |práv. |př. Kr.|př. n. l.|před n. l.|předl. |přivl. |r. |rak. |rcsl. |refl. |reg. |resp. |rkp. |RNDr. |roč. |RSDr. |rtm. |rtn. |rum. |rus. |s. |s. p.|s. r. o.|samohl. |Sb. |sg. |sl. |slang. |slov. |souhl. |spec. |spol. s r. o.|sport. |srov. |st. |stfr. |stol. |str. |stržm. |stsl. |střv. |subj. |subst. |superl. |sv. |svob. |sz. |t. r.|tech. |telev. |teol. |ThDr. |tis. |tj. |trans. |tur. |typogr. |tzn. |tzv. |táz. |v z.|v. o. s.|v. r.|v. v. i.|var. |vedl. |verb. |vl. jm. |voj. |vok. |vulg. |vztaž. |výtv. |vč. |vůb. |z. s.|zahr. |zast. |zejm. |zeměd. |zkr. |zn. |zvl. |zájm. |zř. |č. |č. j.|č. p. |čas. |čes. |čet. |čj. |čp. |čín. |čís. |ř. |řec. |říj. |škpt. |špan. |šprap. |št. prap. |švýc. )/i
|
63
44
|
input.gsub(abbreviations) { |abbr| abbr.gsub(/ /, ' ') }
|
64
45
|
end
|
65
46
|
|
47
|
+
# Converts single quotes to the typograhic ones.
|
48
|
+
#
|
49
|
+
# @param input [String] The paragraph which will be converted.
|
50
|
+
# @return [String] Paragraph with correct single quotes.
|
51
|
+
def czech_single_quotes(input)
|
52
|
+
quotes(input, "'", "‚", "‘")
|
53
|
+
end
|
54
|
+
|
55
|
+
# Converts double quotes to the typograhic ones.
|
56
|
+
#
|
57
|
+
# @param input [String] The paragraph which will be converted.
|
58
|
+
# @return [String] Paragraph with correct double quotes.
|
59
|
+
def czech_double_quotes(input)
|
60
|
+
quotes(input, "\"", "„", "”")
|
61
|
+
end
|
62
|
+
|
66
63
|
end
|
67
64
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
|
2
|
+
module Truty
|
3
|
+
|
4
|
+
# Module with specific English typography fixes.
|
5
|
+
# @author Matěj Kašpar Jirásek
|
6
|
+
module English
|
7
|
+
|
8
|
+
# Improves the English typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
|
9
|
+
#
|
10
|
+
# @param input [String] The paragraph which will be converted.
|
11
|
+
# @param country [String] The country ("uk" or "us").
|
12
|
+
# @return [String] Paragraph with improved typography.
|
13
|
+
def english(input, country = "us")
|
14
|
+
input = soft_hyphens(input, "en_" + country)
|
15
|
+
input = general(input)
|
16
|
+
input = english_double_quotes(input)
|
17
|
+
input = english_single_quotes(input)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Converts single quotes to the typograhic ones.
|
21
|
+
#
|
22
|
+
# @param input [String] The paragraph which will be converted.
|
23
|
+
# @return [String] Paragraph with correct single quotes.
|
24
|
+
def english_single_quotes(input)
|
25
|
+
quotes(input, "'", "‘", "’")
|
26
|
+
end
|
27
|
+
|
28
|
+
# Converts double quotes to the typograhic ones.
|
29
|
+
#
|
30
|
+
# @param input [String] The paragraph which will be converted.
|
31
|
+
# @return [String] Paragraph with correct double quotes.
|
32
|
+
def english_double_quotes(input)
|
33
|
+
quotes(input)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
data/lib/truty/french.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
|
2
|
+
module Truty
|
3
|
+
|
4
|
+
# Module with specific French typography fixes.
|
5
|
+
# @author Matěj Kašpar Jirásek
|
6
|
+
module French
|
7
|
+
|
8
|
+
# Improves the French typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
|
9
|
+
#
|
10
|
+
# @param input [String] The paragraph which will be converted.
|
11
|
+
# @return [String] Paragraph with improved typography.
|
12
|
+
def french(input)
|
13
|
+
input = soft_hyphens(input, "fr")
|
14
|
+
input = general(input)
|
15
|
+
input = french_double_quotes(input)
|
16
|
+
input = french_single_quotes(input)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Converts single quotes to the typograhic ones.
|
20
|
+
#
|
21
|
+
# @param input [String] The paragraph which will be converted.
|
22
|
+
# @return [String] Paragraph with correct single quotes.
|
23
|
+
def french_single_quotes(input)
|
24
|
+
quotes(input, "'", "‹ ", " ›")
|
25
|
+
end
|
26
|
+
|
27
|
+
# Converts double quotes to the typograhic ones.
|
28
|
+
#
|
29
|
+
# @param input [String] The paragraph which will be converted.
|
30
|
+
# @return [String] Paragraph with correct double quotes.
|
31
|
+
def french_double_quotes(input)
|
32
|
+
quotes(input, "\"", "« ", " »")
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
data/lib/truty/general.rb
CHANGED
@@ -8,33 +8,31 @@ module Truty
|
|
8
8
|
# Improves the typography of the large plain text with paragraphs. Adds non-breaking spaces, hyphenation, fixes dashes, etc.
|
9
9
|
#
|
10
10
|
# @param input [String] The text which will be converted.
|
11
|
-
# @param lang [
|
11
|
+
# @param lang [Symbol] Sets the language (english name like "czech", "german", etc.)
|
12
12
|
# @return [String] Text with improved typography.
|
13
|
-
def fix(input, lang =
|
14
|
-
|
13
|
+
def fix(input, lang = :general)
|
14
|
+
if not Truty.respond_to? lang then
|
15
|
+
lang = :general
|
16
|
+
end
|
17
|
+
input.split("\n").collect { |p| Truty.send lang, p }.join("\n")
|
15
18
|
end
|
16
19
|
|
17
|
-
# Improves
|
20
|
+
# Improves basic non-language specific issues in typography.
|
18
21
|
#
|
19
22
|
# @param input [String] The paragraph which will be converted.
|
20
|
-
# @param lang [String] Sets the language of hyphenation. (See {#add_soft_hyphens}.)
|
21
23
|
# @return [String] Paragraph with improved typography.
|
22
|
-
def
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
output = fix_space_between_numbers(output)
|
35
|
-
output = fix_units(output)
|
36
|
-
output = fix_trailing_spaces(output)
|
37
|
-
output = fix_widows(output)
|
24
|
+
def general(input)
|
25
|
+
input = ellipsis(input)
|
26
|
+
input = multicharacters(input)
|
27
|
+
input = punctuation_whitespace(input)
|
28
|
+
input = brackets_whitespace(input)
|
29
|
+
input = emdash(input)
|
30
|
+
input = endash(input)
|
31
|
+
input = multiplication_sign(input)
|
32
|
+
input = space_between_numbers(input)
|
33
|
+
input = units(input)
|
34
|
+
input = trailing_spaces(input)
|
35
|
+
input = widows(input)
|
38
36
|
end
|
39
37
|
|
40
38
|
# Converts three or more periods (dots, points) into ellipsis.
|
@@ -49,7 +47,7 @@ module Truty
|
|
49
47
|
#
|
50
48
|
# @param input [String] The paragraph which will be converted.
|
51
49
|
# @return [String] Paragraph with corrected emdashes.
|
52
|
-
def
|
50
|
+
def emdash(input)
|
53
51
|
input.gsub(/\s+(—|-{2,3})\s+/, " — ")
|
54
52
|
end
|
55
53
|
|
@@ -57,7 +55,7 @@ module Truty
|
|
57
55
|
#
|
58
56
|
# @param input [String] The paragraph which will be converted.
|
59
57
|
# @return [String] Paragraph with corrected endashes.
|
60
|
-
def
|
58
|
+
def endash(input)
|
61
59
|
input.gsub(/\s+(–|-)\s+/, " – ")
|
62
60
|
end
|
63
61
|
|
@@ -69,7 +67,7 @@ module Truty
|
|
69
67
|
# @param right [Integer] Number of characters on the beginning of the words which cannnot be hyphenated.
|
70
68
|
# @param char [Integer] The character which will be added to hyphenation places.
|
71
69
|
# @return [String] Paragraph with added hyphenation characters.
|
72
|
-
def
|
70
|
+
def soft_hyphens(input, lang = "en_us", left = 2, right = 2, char = "")
|
73
71
|
l = Text::Hyphen.new(:language => lang, :left => left, :right => right)
|
74
72
|
words = input.split(/[ ]+/m)
|
75
73
|
result = []
|
@@ -89,32 +87,16 @@ module Truty
|
|
89
87
|
# @param start_quotes [String] The character used for starting quotes.
|
90
88
|
# @param end_quotes [String] The character used for ending quotes.
|
91
89
|
# @return [String] Paragraph with correct double quotes.
|
92
|
-
def
|
90
|
+
def quotes(input, type = '"', start_quotes = "“", end_quotes = "”")
|
93
91
|
regexp = Regexp.new(type + '[^' + type + ']*' + type)
|
94
92
|
input.gsub(regexp) { |s| start_quotes + s[1..-2].strip + end_quotes }
|
95
93
|
end
|
96
94
|
|
97
|
-
# Converts single quotes to the typograhic ones.
|
98
|
-
#
|
99
|
-
# @param input [String] The paragraph which will be converted.
|
100
|
-
# @return [String] Paragraph with correct single quotes.
|
101
|
-
def fix_single_quotes(input)
|
102
|
-
fix_quotes(input, "'", "‘", "’")
|
103
|
-
end
|
104
|
-
|
105
|
-
# Converts double quotes to the typograhic ones.
|
106
|
-
#
|
107
|
-
# @param input [String] The paragraph which will be converted.
|
108
|
-
# @return [String] Paragraph with correct double quotes.
|
109
|
-
def fix_double_quotes(input)
|
110
|
-
fix_quotes(input, '"', "“", "”")
|
111
|
-
end
|
112
|
-
|
113
95
|
# Adds multiplication sign between numbers instead of X.
|
114
96
|
#
|
115
97
|
# @param input [String] The paragraph which will be converted.
|
116
98
|
# @return [String] Paragraph with correct multiplication signs.
|
117
|
-
def
|
99
|
+
def multiplication_sign(input)
|
118
100
|
output = input.gsub(/(\d+)\s{0,1}[Xx]\s{0,1}(\d+)/, '\1 × \2')
|
119
101
|
output = output.gsub(/(\d+)[Xx]/, '\1×')
|
120
102
|
end
|
@@ -123,7 +105,7 @@ module Truty
|
|
123
105
|
#
|
124
106
|
# @param input [String] The paragraph which will be converted.
|
125
107
|
# @return [String] Paragraph with correct spaces between numbers.
|
126
|
-
def
|
108
|
+
def space_between_numbers(input)
|
127
109
|
input.gsub(/(\d)\s+(\d)/, '\1 \2')
|
128
110
|
end
|
129
111
|
|
@@ -131,7 +113,7 @@ module Truty
|
|
131
113
|
#
|
132
114
|
# @param input [String] The paragraph which will be converted.
|
133
115
|
# @return [String] Paragraph with correct spaces around brackets.
|
134
|
-
def
|
116
|
+
def brackets_whitespace(input)
|
135
117
|
output = input.gsub(/([\(\[\{])\s*/, '\1')
|
136
118
|
output = output.gsub(/\s*([\]\)\}])/, '\1')
|
137
119
|
output = output.gsub(/\s+([\(\[\{])\s*/, ' \1')
|
@@ -142,7 +124,7 @@ module Truty
|
|
142
124
|
#
|
143
125
|
# @param input [String] The paragraph which will be converted.
|
144
126
|
# @return [String] Paragraph with converted characters.
|
145
|
-
def
|
127
|
+
def multicharacters(input)
|
146
128
|
output = input.gsub(/\([Cc]\)/, "©")
|
147
129
|
output = output.gsub(/\([Pp]\)/, "℗")
|
148
130
|
output = output.gsub(/\([Rr]\)/, "®")
|
@@ -159,7 +141,7 @@ module Truty
|
|
159
141
|
#
|
160
142
|
# @param input [String] The paragraph which will be converted.
|
161
143
|
# @return [String] Paragraph with correct spaces around punctuation.
|
162
|
-
def
|
144
|
+
def punctuation_whitespace(input)
|
163
145
|
input.gsub(/\s*([\!\?\.,;:…]+)\s*/, '\1 ')
|
164
146
|
end
|
165
147
|
|
@@ -167,7 +149,7 @@ module Truty
|
|
167
149
|
#
|
168
150
|
# @param input [String] The paragraph which will be converted.
|
169
151
|
# @return [String] Paragraph with correct spaces between number and unit.
|
170
|
-
def
|
152
|
+
def units(input)
|
171
153
|
output = input.gsub(/(\d+)\s+(%|‰|‱|℃|℉|°|€|Kč|(Y|Z|E|P|T|G|M|k|h|da|d|m|µ|n|p|f|a|z|y)?(m(²|³)?|g|s|h|A|K|cd|mol|Ω|℃|℉))/, '\1 \2')
|
172
154
|
output.gsub(/(\*|§|#|†)\s+(\d+)/, '\1 \2')
|
173
155
|
end
|
@@ -176,7 +158,7 @@ module Truty
|
|
176
158
|
#
|
177
159
|
# @param input [String] The paragraph which will be converted.
|
178
160
|
# @return [String] Paragraph with removed widows.
|
179
|
-
def
|
161
|
+
def widows(input)
|
180
162
|
input.gsub(/(\s)(\S+(\$|\z))/, ' \2')
|
181
163
|
end
|
182
164
|
|
@@ -184,7 +166,7 @@ module Truty
|
|
184
166
|
#
|
185
167
|
# @param input [String] The paragraph which will be converted.
|
186
168
|
# @return [String] Paragraph without trailing spaces.
|
187
|
-
def
|
169
|
+
def trailing_spaces(input)
|
188
170
|
input.gsub(/\s*($|\z)/, '')
|
189
171
|
end
|
190
172
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: truty
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matěj Kašpar Jirásek
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-01-
|
11
|
+
date: 2015-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: text-hyphen
|
@@ -66,9 +66,10 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0.8'
|
69
|
-
description: A string converter
|
69
|
+
description: A string converter aiming to correct the typography of plain text.
|
70
70
|
email: matej.jirasek@me.com
|
71
|
-
executables:
|
71
|
+
executables:
|
72
|
+
- truty
|
72
73
|
extensions: []
|
73
74
|
extra_rdoc_files: []
|
74
75
|
files:
|
@@ -77,6 +78,8 @@ files:
|
|
77
78
|
- bin/truty
|
78
79
|
- lib/truty.rb
|
79
80
|
- lib/truty/czech.rb
|
81
|
+
- lib/truty/english.rb
|
82
|
+
- lib/truty/french.rb
|
80
83
|
- lib/truty/general.rb
|
81
84
|
homepage: https://github.com/mkj-is/Truty
|
82
85
|
licenses:
|