truty 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/truty +6 -1
- data/lib/truty.rb +3 -1
- data/lib/truty/czech.rb +29 -32
- data/lib/truty/english.rb +37 -0
- data/lib/truty/french.rb +36 -0
- data/lib/truty/general.rb +31 -49
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4feee35354a4825efaf7b001e6e75d5da250d16d
|
4
|
+
data.tar.gz: b026eb59f352ba1eb611a19eac14d87b4b2d426f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 362609e41ac202bcfb3f46be87e9b6caeb8f6c82b424086feca76222b54bfb9aa60690fd5c0a27909feb58a7f26fb3bd468a72608af6b31125ef4500f0200c49
|
7
|
+
data.tar.gz: 100ae5a99c6a5552a8c99aec4444e5dfdd820fba072066b0105675db41003fee0ced281181f82fe0459e71926384a45715f10769996c2c611c76ba1fe484faa1
|
data/bin/truty
CHANGED
data/lib/truty.rb
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
require 'uri'
|
4
4
|
require 'text/hyphen'
|
5
5
|
require 'truty/general'
|
6
|
+
require 'truty/english'
|
7
|
+
require 'truty/french'
|
6
8
|
require 'truty/czech'
|
7
9
|
|
8
10
|
# A Ruby library which is a simple string converter, which aims to fix all the typography imperfections of the plain text.
|
@@ -10,7 +12,7 @@ require 'truty/czech'
|
|
10
12
|
module Truty
|
11
13
|
|
12
14
|
extend General
|
13
|
-
extend Czech
|
15
|
+
extend English, French, Czech
|
14
16
|
|
15
17
|
end
|
16
18
|
|
data/lib/truty/czech.rb
CHANGED
@@ -5,52 +5,33 @@ module Truty
|
|
5
5
|
# @author Matěj Kašpar Jirásek
|
6
6
|
module Czech
|
7
7
|
|
8
|
-
# Improves the typography of
|
9
|
-
#
|
10
|
-
# @param input [String] The text which will be converted.
|
11
|
-
# @return [String] Text with improved typography.
|
12
|
-
def fix_czech_text(input)
|
13
|
-
input.split("\n").collect { |p| fix_czech_paragraph(p) }.join("\n")
|
14
|
-
end
|
15
|
-
|
16
|
-
# Improves the Czech typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {#fix_czech_text}.
|
8
|
+
# Improves the Czech typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
|
17
9
|
#
|
18
10
|
# @param input [String] The paragraph which will be converted.
|
19
11
|
# @return [String] Paragraph with improved typography.
|
20
|
-
def
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
output = endash_spaces(output)
|
29
|
-
output = fix_quotes(output, "\"", "„", "“")
|
30
|
-
output = fix_quotes(output, "'", "‚", "‘")
|
31
|
-
output = fix_multiplication_sign(output)
|
32
|
-
output = fix_space_between_numbers(output)
|
33
|
-
output = fix_units(output)
|
34
|
-
output = fix_trailing_spaces(output)
|
35
|
-
output = fix_widows(output)
|
36
|
-
output = fix_long_czech_numbers(output)
|
37
|
-
output = fix_czech_one_character_words(output)
|
38
|
-
output = fix_czech_abbreviations(output)
|
12
|
+
def czech(input)
|
13
|
+
input = soft_hyphens(input, "cs")
|
14
|
+
input = general(input)
|
15
|
+
input = czech_double_quotes(input)
|
16
|
+
input = czech_single_quotes(input)
|
17
|
+
input = czech_long_numbers(input)
|
18
|
+
input = czech_prepositions(input)
|
19
|
+
input = czech_abbreviations(input)
|
39
20
|
end
|
40
21
|
|
41
22
|
# Adds non-breaking space after Czech one character prepostion.
|
42
23
|
#
|
43
24
|
# @param input [String] The paragraph which will be converted.
|
44
25
|
# @return [String] Paragraph with non-breaking spaces after prepositions.
|
45
|
-
def
|
46
|
-
input.gsub(/(\s+|^|\A)(([aikosuvz]\s+)+)/i) {
|
26
|
+
def czech_prepositions(input)
|
27
|
+
input.gsub(/(\s+|^|\A)(([aikosuvz]\s+)+)/i) { $1 + $2.gsub(/\s+/, " ") }
|
47
28
|
end
|
48
29
|
|
49
30
|
# Divides long numbers into parts of three digits using thin space.
|
50
31
|
#
|
51
32
|
# @param input [String] The paragraph which will be converted.
|
52
33
|
# @return [String] Paragraph with spaces inside of long numbers.
|
53
|
-
def
|
34
|
+
def czech_long_numbers(input)
|
54
35
|
input.gsub(/\d+/) { |n| n.reverse.scan(/(.{1,3})/).join(' ').reverse }
|
55
36
|
end
|
56
37
|
|
@@ -58,10 +39,26 @@ module Truty
|
|
58
39
|
#
|
59
40
|
# @param input [String] The paragraph which will be converted.
|
60
41
|
# @return [String] Paragraph with non-breaking spaces in and after abbreviations.
|
61
|
-
def
|
42
|
+
def czech_abbreviations(input)
|
62
43
|
abbreviations = /(a. s.|abl. |absol. |adj. |adm. |adv. |aj.|ak. |ak. sl.|akt. |alch. |amer. |anat. |angl. |anglosas. |ap.|apod.|arab. |arch. |archit. |arg. |arm. gen. |astr. |astrol. |atd.|atp.|att. |b. k.|Bc. |BcA. |belg. |bibl. |biol. |bl. |boh. |bot. |br. |brig. gen. |brit. |bulh. |bás. |býv. |chcsl. |chem. |chil. |CSc. |csl. |círk. |dat. |dep. |des. |dial. |DiS.|dl. |doc. |dol. |dop. |dopr. |dosl. |dán. |dór. |děj. |dět. |ekon. |epic. |etnonym. |eufem. |ev. |event. |f. |fam. |fem. |fil. |film. |fin. |form. |fot. |fr. |fut. |fyz. |gen. |genmjr. |genplk. |genpor. |geogr. |geol. |geom. |germ. |gram. |hebr. |herald. |hist. |hl. |hod. |hor. |horn. |hovor. |hud. |hut. |ie. |imp. |impf. |ind. |indoevr. |inf. |Ing. |instr. |interj. |iron. |it. |ión. |j. č.|jap. |JUDr. |k. s.|kanad. |katalán. |klas. |kniž. |komp. |konj. |konkr. |kpt. |kr. |kuch. |kř. |lat. |les. |lid. |lit. |liturg. |log. |lok. |lék. |m. |mat. |meteor. |metr. |MgA. |Mgr. |mil. |mj. |mjr. |ml. |mld. |mn. č.|mod. |ms. |MUDr. |MVDr. |mysl. |n. |n. l.|např. |neklas. |nesklon. |než. |niz. |nom. |nor. |npor. |nprap. |nrtm. |nstržm. |náb. |nám. |námoř. |něm. |o. p. s.|o. s.|ob. |obch. |obyč. |odd. |odp. |ojed. |opt. |p. |p. n. l.|p. o.|P. S. |P. T. |part. |pas. |pejor. |pers. |pf. |PharmDr. |PhDr. |pl. |plk. |plpf. |po Kr.|pol. |pomn. |popř. |por. |pplk. |ppor. |pprap. |prap. |prep. |prof. |práv. |př. Kr.|př. n. l.|před n. l.|předl. |přivl. |r. |rak. |rcsl. |refl. |reg. |resp. |rkp. |RNDr. |roč. |RSDr. |rtm. |rtn. |rum. |rus. |s. |s. p.|s. r. o.|samohl. |Sb. |sg. |sl. |slang. |slov. |souhl. |spec. |spol. s r. o.|sport. |srov. |st. |stfr. |stol. |str. |stržm. |stsl. |střv. |subj. |subst. |superl. |sv. |svob. |sz. |t. r.|tech. |telev. |teol. |ThDr. |tis. |tj. |trans. |tur. |typogr. |tzn. |tzv. |táz. |v z.|v. o. s.|v. r.|v. v. i.|var. |vedl. |verb. |vl. jm. |voj. |vok. |vulg. |vztaž. |výtv. |vč. |vůb. |z. s.|zahr. |zast. |zejm. |zeměd. |zkr. |zn. |zvl. |zájm. |zř. |č. |č. j.|č. p. |čas. |čes. |čet. |čj. |čp. |čín. |čís. |ř. |řec. |říj. |škpt. |špan. |šprap. |št. prap. |švýc. )/i
|
63
44
|
input.gsub(abbreviations) { |abbr| abbr.gsub(/ /, ' ') }
|
64
45
|
end
|
65
46
|
|
47
|
+
# Converts single quotes to the typograhic ones.
|
48
|
+
#
|
49
|
+
# @param input [String] The paragraph which will be converted.
|
50
|
+
# @return [String] Paragraph with correct single quotes.
|
51
|
+
def czech_single_quotes(input)
|
52
|
+
quotes(input, "'", "‚", "‘")
|
53
|
+
end
|
54
|
+
|
55
|
+
# Converts double quotes to the typograhic ones.
|
56
|
+
#
|
57
|
+
# @param input [String] The paragraph which will be converted.
|
58
|
+
# @return [String] Paragraph with correct double quotes.
|
59
|
+
def czech_double_quotes(input)
|
60
|
+
quotes(input, "\"", "„", "”")
|
61
|
+
end
|
62
|
+
|
66
63
|
end
|
67
64
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
|
2
|
+
module Truty
|
3
|
+
|
4
|
+
# Module with specific English typography fixes.
|
5
|
+
# @author Matěj Kašpar Jirásek
|
6
|
+
module English
|
7
|
+
|
8
|
+
# Improves the English typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
|
9
|
+
#
|
10
|
+
# @param input [String] The paragraph which will be converted.
|
11
|
+
# @param country [String] The country ("uk" or "us").
|
12
|
+
# @return [String] Paragraph with improved typography.
|
13
|
+
def english(input, country = "us")
|
14
|
+
input = soft_hyphens(input, "en_" + country)
|
15
|
+
input = general(input)
|
16
|
+
input = english_double_quotes(input)
|
17
|
+
input = english_single_quotes(input)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Converts single quotes to the typograhic ones.
|
21
|
+
#
|
22
|
+
# @param input [String] The paragraph which will be converted.
|
23
|
+
# @return [String] Paragraph with correct single quotes.
|
24
|
+
def english_single_quotes(input)
|
25
|
+
quotes(input, "'", "‘", "’")
|
26
|
+
end
|
27
|
+
|
28
|
+
# Converts double quotes to the typograhic ones.
|
29
|
+
#
|
30
|
+
# @param input [String] The paragraph which will be converted.
|
31
|
+
# @return [String] Paragraph with correct double quotes.
|
32
|
+
def english_double_quotes(input)
|
33
|
+
quotes(input)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
data/lib/truty/french.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
|
2
|
+
module Truty
|
3
|
+
|
4
|
+
# Module with specific French typography fixes.
|
5
|
+
# @author Matěj Kašpar Jirásek
|
6
|
+
module French
|
7
|
+
|
8
|
+
# Improves the French typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
|
9
|
+
#
|
10
|
+
# @param input [String] The paragraph which will be converted.
|
11
|
+
# @return [String] Paragraph with improved typography.
|
12
|
+
def french(input)
|
13
|
+
input = soft_hyphens(input, "fr")
|
14
|
+
input = general(input)
|
15
|
+
input = french_double_quotes(input)
|
16
|
+
input = french_single_quotes(input)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Converts single quotes to the typograhic ones.
|
20
|
+
#
|
21
|
+
# @param input [String] The paragraph which will be converted.
|
22
|
+
# @return [String] Paragraph with correct single quotes.
|
23
|
+
def french_single_quotes(input)
|
24
|
+
quotes(input, "'", "‹ ", " ›")
|
25
|
+
end
|
26
|
+
|
27
|
+
# Converts double quotes to the typograhic ones.
|
28
|
+
#
|
29
|
+
# @param input [String] The paragraph which will be converted.
|
30
|
+
# @return [String] Paragraph with correct double quotes.
|
31
|
+
def french_double_quotes(input)
|
32
|
+
quotes(input, "\"", "« ", " »")
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
data/lib/truty/general.rb
CHANGED
@@ -8,33 +8,31 @@ module Truty
|
|
8
8
|
# Improves the typography of the large plain text with paragraphs. Adds non-breaking spaces, hyphenation, fixes dashes, etc.
|
9
9
|
#
|
10
10
|
# @param input [String] The text which will be converted.
|
11
|
-
# @param lang [
|
11
|
+
# @param lang [Symbol] Sets the language (english name like "czech", "german", etc.)
|
12
12
|
# @return [String] Text with improved typography.
|
13
|
-
def fix(input, lang =
|
14
|
-
|
13
|
+
def fix(input, lang = :general)
|
14
|
+
if not Truty.respond_to? lang then
|
15
|
+
lang = :general
|
16
|
+
end
|
17
|
+
input.split("\n").collect { |p| Truty.send lang, p }.join("\n")
|
15
18
|
end
|
16
19
|
|
17
|
-
# Improves
|
20
|
+
# Improves basic non-language specific issues in typography.
|
18
21
|
#
|
19
22
|
# @param input [String] The paragraph which will be converted.
|
20
|
-
# @param lang [String] Sets the language of hyphenation. (See {#add_soft_hyphens}.)
|
21
23
|
# @return [String] Paragraph with improved typography.
|
22
|
-
def
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
output = fix_space_between_numbers(output)
|
35
|
-
output = fix_units(output)
|
36
|
-
output = fix_trailing_spaces(output)
|
37
|
-
output = fix_widows(output)
|
24
|
+
def general(input)
|
25
|
+
input = ellipsis(input)
|
26
|
+
input = multicharacters(input)
|
27
|
+
input = punctuation_whitespace(input)
|
28
|
+
input = brackets_whitespace(input)
|
29
|
+
input = emdash(input)
|
30
|
+
input = endash(input)
|
31
|
+
input = multiplication_sign(input)
|
32
|
+
input = space_between_numbers(input)
|
33
|
+
input = units(input)
|
34
|
+
input = trailing_spaces(input)
|
35
|
+
input = widows(input)
|
38
36
|
end
|
39
37
|
|
40
38
|
# Converts three or more periods (dots, points) into ellipsis.
|
@@ -49,7 +47,7 @@ module Truty
|
|
49
47
|
#
|
50
48
|
# @param input [String] The paragraph which will be converted.
|
51
49
|
# @return [String] Paragraph with corrected emdashes.
|
52
|
-
def
|
50
|
+
def emdash(input)
|
53
51
|
input.gsub(/\s+(—|-{2,3})\s+/, " — ")
|
54
52
|
end
|
55
53
|
|
@@ -57,7 +55,7 @@ module Truty
|
|
57
55
|
#
|
58
56
|
# @param input [String] The paragraph which will be converted.
|
59
57
|
# @return [String] Paragraph with corrected endashes.
|
60
|
-
def
|
58
|
+
def endash(input)
|
61
59
|
input.gsub(/\s+(–|-)\s+/, " – ")
|
62
60
|
end
|
63
61
|
|
@@ -69,7 +67,7 @@ module Truty
|
|
69
67
|
# @param right [Integer] Number of characters on the beginning of the words which cannnot be hyphenated.
|
70
68
|
# @param char [Integer] The character which will be added to hyphenation places.
|
71
69
|
# @return [String] Paragraph with added hyphenation characters.
|
72
|
-
def
|
70
|
+
def soft_hyphens(input, lang = "en_us", left = 2, right = 2, char = "")
|
73
71
|
l = Text::Hyphen.new(:language => lang, :left => left, :right => right)
|
74
72
|
words = input.split(/[ ]+/m)
|
75
73
|
result = []
|
@@ -89,32 +87,16 @@ module Truty
|
|
89
87
|
# @param start_quotes [String] The character used for starting quotes.
|
90
88
|
# @param end_quotes [String] The character used for ending quotes.
|
91
89
|
# @return [String] Paragraph with correct double quotes.
|
92
|
-
def
|
90
|
+
def quotes(input, type = '"', start_quotes = "“", end_quotes = "”")
|
93
91
|
regexp = Regexp.new(type + '[^' + type + ']*' + type)
|
94
92
|
input.gsub(regexp) { |s| start_quotes + s[1..-2].strip + end_quotes }
|
95
93
|
end
|
96
94
|
|
97
|
-
# Converts single quotes to the typograhic ones.
|
98
|
-
#
|
99
|
-
# @param input [String] The paragraph which will be converted.
|
100
|
-
# @return [String] Paragraph with correct single quotes.
|
101
|
-
def fix_single_quotes(input)
|
102
|
-
fix_quotes(input, "'", "‘", "’")
|
103
|
-
end
|
104
|
-
|
105
|
-
# Converts double quotes to the typograhic ones.
|
106
|
-
#
|
107
|
-
# @param input [String] The paragraph which will be converted.
|
108
|
-
# @return [String] Paragraph with correct double quotes.
|
109
|
-
def fix_double_quotes(input)
|
110
|
-
fix_quotes(input, '"', "“", "”")
|
111
|
-
end
|
112
|
-
|
113
95
|
# Adds multiplication sign between numbers instead of X.
|
114
96
|
#
|
115
97
|
# @param input [String] The paragraph which will be converted.
|
116
98
|
# @return [String] Paragraph with correct multiplication signs.
|
117
|
-
def
|
99
|
+
def multiplication_sign(input)
|
118
100
|
output = input.gsub(/(\d+)\s{0,1}[Xx]\s{0,1}(\d+)/, '\1 × \2')
|
119
101
|
output = output.gsub(/(\d+)[Xx]/, '\1×')
|
120
102
|
end
|
@@ -123,7 +105,7 @@ module Truty
|
|
123
105
|
#
|
124
106
|
# @param input [String] The paragraph which will be converted.
|
125
107
|
# @return [String] Paragraph with correct spaces between numbers.
|
126
|
-
def
|
108
|
+
def space_between_numbers(input)
|
127
109
|
input.gsub(/(\d)\s+(\d)/, '\1 \2')
|
128
110
|
end
|
129
111
|
|
@@ -131,7 +113,7 @@ module Truty
|
|
131
113
|
#
|
132
114
|
# @param input [String] The paragraph which will be converted.
|
133
115
|
# @return [String] Paragraph with correct spaces around brackets.
|
134
|
-
def
|
116
|
+
def brackets_whitespace(input)
|
135
117
|
output = input.gsub(/([\(\[\{])\s*/, '\1')
|
136
118
|
output = output.gsub(/\s*([\]\)\}])/, '\1')
|
137
119
|
output = output.gsub(/\s+([\(\[\{])\s*/, ' \1')
|
@@ -142,7 +124,7 @@ module Truty
|
|
142
124
|
#
|
143
125
|
# @param input [String] The paragraph which will be converted.
|
144
126
|
# @return [String] Paragraph with converted characters.
|
145
|
-
def
|
127
|
+
def multicharacters(input)
|
146
128
|
output = input.gsub(/\([Cc]\)/, "©")
|
147
129
|
output = output.gsub(/\([Pp]\)/, "℗")
|
148
130
|
output = output.gsub(/\([Rr]\)/, "®")
|
@@ -159,7 +141,7 @@ module Truty
|
|
159
141
|
#
|
160
142
|
# @param input [String] The paragraph which will be converted.
|
161
143
|
# @return [String] Paragraph with correct spaces around punctuation.
|
162
|
-
def
|
144
|
+
def punctuation_whitespace(input)
|
163
145
|
input.gsub(/\s*([\!\?\.,;:…]+)\s*/, '\1 ')
|
164
146
|
end
|
165
147
|
|
@@ -167,7 +149,7 @@ module Truty
|
|
167
149
|
#
|
168
150
|
# @param input [String] The paragraph which will be converted.
|
169
151
|
# @return [String] Paragraph with correct spaces between number and unit.
|
170
|
-
def
|
152
|
+
def units(input)
|
171
153
|
output = input.gsub(/(\d+)\s+(%|‰|‱|℃|℉|°|€|Kč|(Y|Z|E|P|T|G|M|k|h|da|d|m|µ|n|p|f|a|z|y)?(m(²|³)?|g|s|h|A|K|cd|mol|Ω|℃|℉))/, '\1 \2')
|
172
154
|
output.gsub(/(\*|§|#|†)\s+(\d+)/, '\1 \2')
|
173
155
|
end
|
@@ -176,7 +158,7 @@ module Truty
|
|
176
158
|
#
|
177
159
|
# @param input [String] The paragraph which will be converted.
|
178
160
|
# @return [String] Paragraph with removed widows.
|
179
|
-
def
|
161
|
+
def widows(input)
|
180
162
|
input.gsub(/(\s)(\S+(\$|\z))/, ' \2')
|
181
163
|
end
|
182
164
|
|
@@ -184,7 +166,7 @@ module Truty
|
|
184
166
|
#
|
185
167
|
# @param input [String] The paragraph which will be converted.
|
186
168
|
# @return [String] Paragraph without trailing spaces.
|
187
|
-
def
|
169
|
+
def trailing_spaces(input)
|
188
170
|
input.gsub(/\s*($|\z)/, '')
|
189
171
|
end
|
190
172
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: truty
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matěj Kašpar Jirásek
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-01-
|
11
|
+
date: 2015-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: text-hyphen
|
@@ -66,9 +66,10 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0.8'
|
69
|
-
description: A string converter
|
69
|
+
description: A string converter aiming to correct the typography of plain text.
|
70
70
|
email: matej.jirasek@me.com
|
71
|
-
executables:
|
71
|
+
executables:
|
72
|
+
- truty
|
72
73
|
extensions: []
|
73
74
|
extra_rdoc_files: []
|
74
75
|
files:
|
@@ -77,6 +78,8 @@ files:
|
|
77
78
|
- bin/truty
|
78
79
|
- lib/truty.rb
|
79
80
|
- lib/truty/czech.rb
|
81
|
+
- lib/truty/english.rb
|
82
|
+
- lib/truty/french.rb
|
80
83
|
- lib/truty/general.rb
|
81
84
|
homepage: https://github.com/mkj-is/Truty
|
82
85
|
licenses:
|