truty 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1f40bb7fec5fbcc791d421f3815710ffea30736b
4
- data.tar.gz: a64fddfb5f0fec224737008005e4f3657b09c22a
3
+ metadata.gz: 4feee35354a4825efaf7b001e6e75d5da250d16d
4
+ data.tar.gz: b026eb59f352ba1eb611a19eac14d87b4b2d426f
5
5
  SHA512:
6
- metadata.gz: 14476727de9625d8872b21ab3134d441b662da6a4c9010d7756ac17ade34d500125b840a9273fda4c2971067c1f6990e115d8af1222760c36a7e43f73a2ea5c8
7
- data.tar.gz: bd59bad1b93812342cb70fba9435b2164cf10acbdaff7b6387c89a1a71606e4d67255770472d001e20bbdecc21e2767628867664dbf7f0847a07e5bef7c0c44a
6
+ metadata.gz: 362609e41ac202bcfb3f46be87e9b6caeb8f6c82b424086feca76222b54bfb9aa60690fd5c0a27909feb58a7f26fb3bd468a72608af6b31125ef4500f0200c49
7
+ data.tar.gz: 100ae5a99c6a5552a8c99aec4444e5dfdd820fba072066b0105675db41003fee0ced281181f82fe0459e71926384a45715f10769996c2c611c76ba1fe484faa1
data/bin/truty CHANGED
@@ -3,7 +3,12 @@
3
3
  require "truty"
4
4
 
5
5
  def main
6
- puts Truty.fix_czech_text(ARGF.read)
6
+ language = :general
7
+ if ARGV[0] == "-l" || ARGV[0] == "--language" then
8
+ language = ARGV[1]
9
+ ARGV.shift(2)
10
+ end
11
+ puts Truty.send :fix, ARGF.read, language
7
12
  end
8
13
 
9
14
  main
@@ -3,6 +3,8 @@
3
3
  require 'uri'
4
4
  require 'text/hyphen'
5
5
  require 'truty/general'
6
+ require 'truty/english'
7
+ require 'truty/french'
6
8
  require 'truty/czech'
7
9
 
8
10
  # A Ruby library which is a simple string converter, which aims to fix all the typography imperfections of the plain text.
@@ -10,7 +12,7 @@ require 'truty/czech'
10
12
  module Truty
11
13
 
12
14
  extend General
13
- extend Czech
15
+ extend English, French, Czech
14
16
 
15
17
  end
16
18
 
@@ -5,52 +5,33 @@ module Truty
5
5
  # @author Matěj Kašpar Jirásek
6
6
  module Czech
7
7
 
8
- # Improves the typography of the large plain text with paragraphs. Adds non-breaking spaces, hyphenation, fixes dashes, etc. Fixes some typography fixes specific for the Czech languages, like one character prepositions, abbreviations and spaces between numbers.
9
- #
10
- # @param input [String] The text which will be converted.
11
- # @return [String] Text with improved typography.
12
- def fix_czech_text(input)
13
- input.split("\n").collect { |p| fix_czech_paragraph(p) }.join("\n")
14
- end
15
-
16
- # Improves the Czech typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {#fix_czech_text}.
8
+ # Improves the Czech typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
17
9
  #
18
10
  # @param input [String] The paragraph which will be converted.
19
11
  # @return [String] Paragraph with improved typography.
20
- def fix_czech_paragraph(input)
21
- output = input
22
- output = ellipsis(output)
23
- output = fix_multicharacters(output)
24
- output = fix_punctuation_whitespace(output)
25
- output = fix_brackets_whitespace(output)
26
- output = add_soft_hyphens(output, "cs")
27
- output = emdash_spaces(output)
28
- output = endash_spaces(output)
29
- output = fix_quotes(output, "\"", "„", "“")
30
- output = fix_quotes(output, "'", "‚", "‘")
31
- output = fix_multiplication_sign(output)
32
- output = fix_space_between_numbers(output)
33
- output = fix_units(output)
34
- output = fix_trailing_spaces(output)
35
- output = fix_widows(output)
36
- output = fix_long_czech_numbers(output)
37
- output = fix_czech_one_character_words(output)
38
- output = fix_czech_abbreviations(output)
12
+ def czech(input)
13
+ input = soft_hyphens(input, "cs")
14
+ input = general(input)
15
+ input = czech_double_quotes(input)
16
+ input = czech_single_quotes(input)
17
+ input = czech_long_numbers(input)
18
+ input = czech_prepositions(input)
19
+ input = czech_abbreviations(input)
39
20
  end
40
21
 
41
22
  # Adds non-breaking space after Czech one character prepostion.
42
23
  #
43
24
  # @param input [String] The paragraph which will be converted.
44
25
  # @return [String] Paragraph with non-breaking spaces after prepositions.
45
- def fix_czech_one_character_words(input)
46
- input.gsub(/(\s+|^|\A)(([aikosuvz]\s+)+)/i) { |prep| $1 + $2.gsub(/\s+/, " ") }
26
+ def czech_prepositions(input)
27
+ input.gsub(/(\s+|^|\A)(([aikosuvz]\s+)+)/i) { $1 + $2.gsub(/\s+/, " ") }
47
28
  end
48
29
 
49
30
  # Divides long numbers into parts of three digits using thin space.
50
31
  #
51
32
  # @param input [String] The paragraph which will be converted.
52
33
  # @return [String] Paragraph with spaces inside of long numbers.
53
- def fix_long_czech_numbers(input)
34
+ def czech_long_numbers(input)
54
35
  input.gsub(/\d+/) { |n| n.reverse.scan(/(.{1,3})/).join(' ').reverse }
55
36
  end
56
37
 
@@ -58,10 +39,26 @@ module Truty
58
39
  #
59
40
  # @param input [String] The paragraph which will be converted.
60
41
  # @return [String] Paragraph with non-breaking spaces in and after abbreviations.
61
- def fix_czech_abbreviations(input)
42
+ def czech_abbreviations(input)
62
43
  abbreviations = /(a. s.|abl. |absol. |adj. |adm. |adv. |aj.|ak. |ak. sl.|akt. |alch. |amer. |anat. |angl. |anglosas. |ap.|apod.|arab. |arch. |archit. |arg. |arm. gen. |astr. |astrol. |atd.|atp.|att. |b. k.|Bc. |BcA. |belg. |bibl. |biol. |bl. |boh. |bot. |br. |brig. gen. |brit. |bulh. |bás. |býv. |chcsl. |chem. |chil. |CSc. |csl. |círk. |dat. |dep. |des. |dial. |DiS.|dl. |doc. |dol. |dop. |dopr. |dosl. |dán. |dór. |děj. |dět. |ekon. |epic. |etnonym. |eufem. |ev. |event. |f. |fam. |fem. |fil. |film. |fin. |form. |fot. |fr. |fut. |fyz. |gen. |genmjr. |genplk. |genpor. |geogr. |geol. |geom. |germ. |gram. |hebr. |herald. |hist. |hl. |hod. |hor. |horn. |hovor. |hud. |hut. |ie. |imp. |impf. |ind. |indoevr. |inf. |Ing. |instr. |interj. |iron. |it. |ión. |j. č.|jap. |JUDr. |k. s.|kanad. |katalán. |klas. |kniž. |komp. |konj. |konkr. |kpt. |kr. |kuch. |kř. |lat. |les. |lid. |lit. |liturg. |log. |lok. |lék. |m. |mat. |meteor. |metr. |MgA. |Mgr. |mil. |mj. |mjr. |ml. |mld. |mn. č.|mod. |ms. |MUDr. |MVDr. |mysl. |n. |n. l.|např. |neklas. |nesklon. |než. |niz. |nom. |nor. |npor. |nprap. |nrtm. |nstržm. |náb. |nám. |námoř. |něm. |o. p. s.|o. s.|ob. |obch. |obyč. |odd. |odp. |ojed. |opt. |p. |p. n. l.|p. o.|P. S. |P. T. |part. |pas. |pejor. |pers. |pf. |PharmDr. |PhDr. |pl. |plk. |plpf. |po Kr.|pol. |pomn. |popř. |por. |pplk. |ppor. |pprap. |prap. |prep. |prof. |práv. |př. Kr.|př. n. l.|před n. l.|předl. |přivl. |r. |rak. |rcsl. |refl. |reg. |resp. |rkp. |RNDr. |roč. |RSDr. |rtm. |rtn. |rum. |rus. |s. |s. p.|s. r. o.|samohl. |Sb. |sg. |sl. |slang. |slov. |souhl. |spec. |spol. s r. o.|sport. |srov. |st. |stfr. |stol. |str. |stržm. |stsl. |střv. |subj. |subst. |superl. |sv. |svob. |sz. |t. r.|tech. |telev. |teol. |ThDr. |tis. |tj. |trans. |tur. |typogr. |tzn. |tzv. |táz. |v z.|v. o. s.|v. r.|v. v. i.|var. |vedl. |verb. |vl. jm. |voj. |vok. |vulg. |vztaž. |výtv. |vč. |vůb. |z. s.|zahr. |zast. |zejm. |zeměd. |zkr. |zn. |zvl. |zájm. |zř. |č. |č. j.|č. p. |čas. |čes. |čet. |čj. |čp. |čín. |čís. |ř. |řec. |říj. |škpt. |špan. |šprap. |št. prap. |švýc. )/i
63
44
  input.gsub(abbreviations) { |abbr| abbr.gsub(/ /, ' ') }
64
45
  end
65
46
 
47
+ # Converts single quotes to the typograhic ones.
48
+ #
49
+ # @param input [String] The paragraph which will be converted.
50
+ # @return [String] Paragraph with correct single quotes.
51
+ def czech_single_quotes(input)
52
+ quotes(input, "'", "‚", "‘")
53
+ end
54
+
55
+ # Converts double quotes to the typograhic ones.
56
+ #
57
+ # @param input [String] The paragraph which will be converted.
58
+ # @return [String] Paragraph with correct double quotes.
59
+ def czech_double_quotes(input)
60
+ quotes(input, "\"", "„", "”")
61
+ end
62
+
66
63
  end
67
64
  end
@@ -0,0 +1,37 @@
1
+
2
+ module Truty
3
+
4
+ # Module with specific English typography fixes.
5
+ # @author Matěj Kašpar Jirásek
6
+ module English
7
+
8
+ # Improves the English typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
9
+ #
10
+ # @param input [String] The paragraph which will be converted.
11
+ # @param country [String] The country ("uk" or "us").
12
+ # @return [String] Paragraph with improved typography.
13
+ def english(input, country = "us")
14
+ input = soft_hyphens(input, "en_" + country)
15
+ input = general(input)
16
+ input = english_double_quotes(input)
17
+ input = english_single_quotes(input)
18
+ end
19
+
20
+ # Converts single quotes to the typograhic ones.
21
+ #
22
+ # @param input [String] The paragraph which will be converted.
23
+ # @return [String] Paragraph with correct single quotes.
24
+ def english_single_quotes(input)
25
+ quotes(input, "'", "‘", "’")
26
+ end
27
+
28
+ # Converts double quotes to the typograhic ones.
29
+ #
30
+ # @param input [String] The paragraph which will be converted.
31
+ # @return [String] Paragraph with correct double quotes.
32
+ def english_double_quotes(input)
33
+ quotes(input)
34
+ end
35
+
36
+ end
37
+ end
@@ -0,0 +1,36 @@
1
+
2
+ module Truty
3
+
4
+ # Module with specific French typography fixes.
5
+ # @author Matěj Kašpar Jirásek
6
+ module French
7
+
8
+ # Improves the French typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
9
+ #
10
+ # @param input [String] The paragraph which will be converted.
11
+ # @return [String] Paragraph with improved typography.
12
+ def french(input)
13
+ input = soft_hyphens(input, "fr")
14
+ input = general(input)
15
+ input = french_double_quotes(input)
16
+ input = french_single_quotes(input)
17
+ end
18
+
19
+ # Converts single quotes to the typograhic ones.
20
+ #
21
+ # @param input [String] The paragraph which will be converted.
22
+ # @return [String] Paragraph with correct single quotes.
23
+ def french_single_quotes(input)
24
+ quotes(input, "'", "‹ ", " ›")
25
+ end
26
+
27
+ # Converts double quotes to the typograhic ones.
28
+ #
29
+ # @param input [String] The paragraph which will be converted.
30
+ # @return [String] Paragraph with correct double quotes.
31
+ def french_double_quotes(input)
32
+ quotes(input, "\"", "« ", " »")
33
+ end
34
+
35
+ end
36
+ end
@@ -8,33 +8,31 @@ module Truty
8
8
  # Improves the typography of the large plain text with paragraphs. Adds non-breaking spaces, hyphenation, fixes dashes, etc.
9
9
  #
10
10
  # @param input [String] The text which will be converted.
11
- # @param lang [String] Sets the language of hyphenation. (See {#add_soft_hyphens}.)
11
+ # @param lang [Symbol] Sets the language (english name like "czech", "german", etc.)
12
12
  # @return [String] Text with improved typography.
13
- def fix(input, lang = "en_us")
14
- input.split("\n").collect { |p| fix_paragraph(p, lang) }.join("\n")
13
+ def fix(input, lang = :general)
14
+ if not Truty.respond_to? lang then
15
+ lang = :general
16
+ end
17
+ input.split("\n").collect { |p| Truty.send lang, p }.join("\n")
15
18
  end
16
19
 
17
- # Improves the typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {#fix}.
20
+ # Improves basic non-language specific issues in typography.
18
21
  #
19
22
  # @param input [String] The paragraph which will be converted.
20
- # @param lang [String] Sets the language of hyphenation. (See {#add_soft_hyphens}.)
21
23
  # @return [String] Paragraph with improved typography.
22
- def fix_paragraph(input, lang = "en_us")
23
- output = input
24
- output = ellipsis(output)
25
- output = fix_multicharacters(output)
26
- output = fix_punctuation_whitespace(output)
27
- output = fix_brackets_whitespace(output)
28
- output = add_soft_hyphens(output, lang)
29
- output = emdash_spaces(output)
30
- output = endash_spaces(output)
31
- output = fix_double_quotes(output)
32
- output = fix_single_quotes(output)
33
- output = fix_multiplication_sign(output)
34
- output = fix_space_between_numbers(output)
35
- output = fix_units(output)
36
- output = fix_trailing_spaces(output)
37
- output = fix_widows(output)
24
+ def general(input)
25
+ input = ellipsis(input)
26
+ input = multicharacters(input)
27
+ input = punctuation_whitespace(input)
28
+ input = brackets_whitespace(input)
29
+ input = emdash(input)
30
+ input = endash(input)
31
+ input = multiplication_sign(input)
32
+ input = space_between_numbers(input)
33
+ input = units(input)
34
+ input = trailing_spaces(input)
35
+ input = widows(input)
38
36
  end
39
37
 
40
38
  # Converts three or more periods (dots, points) into ellipsis.
@@ -49,7 +47,7 @@ module Truty
49
47
  #
50
48
  # @param input [String] The paragraph which will be converted.
51
49
  # @return [String] Paragraph with corrected emdashes.
52
- def emdash_spaces(input)
50
+ def emdash(input)
53
51
  input.gsub(/\s+(—|-{2,3})\s+/, " — ")
54
52
  end
55
53
 
@@ -57,7 +55,7 @@ module Truty
57
55
  #
58
56
  # @param input [String] The paragraph which will be converted.
59
57
  # @return [String] Paragraph with corrected endashes.
60
- def endash_spaces(input)
58
+ def endash(input)
61
59
  input.gsub(/\s+(–|-)\s+/, " – ")
62
60
  end
63
61
 
@@ -69,7 +67,7 @@ module Truty
69
67
  # @param right [Integer] Number of characters on the beginning of the words which cannnot be hyphenated.
70
68
  # @param char [Integer] The character which will be added to hyphenation places.
71
69
  # @return [String] Paragraph with added hyphenation characters.
72
- def add_soft_hyphens(input, lang = "en_us", left = 2, right = 2, char = "­")
70
+ def soft_hyphens(input, lang = "en_us", left = 2, right = 2, char = "­")
73
71
  l = Text::Hyphen.new(:language => lang, :left => left, :right => right)
74
72
  words = input.split(/[ ]+/m)
75
73
  result = []
@@ -89,32 +87,16 @@ module Truty
89
87
  # @param start_quotes [String] The character used for starting quotes.
90
88
  # @param end_quotes [String] The character used for ending quotes.
91
89
  # @return [String] Paragraph with correct double quotes.
92
- def fix_quotes(input, type = '"', start_quotes = "“", end_quotes = "”")
90
+ def quotes(input, type = '"', start_quotes = "“", end_quotes = "”")
93
91
  regexp = Regexp.new(type + '[^' + type + ']*' + type)
94
92
  input.gsub(regexp) { |s| start_quotes + s[1..-2].strip + end_quotes }
95
93
  end
96
94
 
97
- # Converts single quotes to the typograhic ones.
98
- #
99
- # @param input [String] The paragraph which will be converted.
100
- # @return [String] Paragraph with correct single quotes.
101
- def fix_single_quotes(input)
102
- fix_quotes(input, "'", "‘", "’")
103
- end
104
-
105
- # Converts double quotes to the typograhic ones.
106
- #
107
- # @param input [String] The paragraph which will be converted.
108
- # @return [String] Paragraph with correct double quotes.
109
- def fix_double_quotes(input)
110
- fix_quotes(input, '"', "“", "”")
111
- end
112
-
113
95
  # Adds multiplication sign between numbers instead of X.
114
96
  #
115
97
  # @param input [String] The paragraph which will be converted.
116
98
  # @return [String] Paragraph with correct multiplication signs.
117
- def fix_multiplication_sign(input)
99
+ def multiplication_sign(input)
118
100
  output = input.gsub(/(\d+)\s{0,1}[Xx]\s{0,1}(\d+)/, '\1 × \2')
119
101
  output = output.gsub(/(\d+)[Xx]/, '\1×')
120
102
  end
@@ -123,7 +105,7 @@ module Truty
123
105
  #
124
106
  # @param input [String] The paragraph which will be converted.
125
107
  # @return [String] Paragraph with correct spaces between numbers.
126
- def fix_space_between_numbers(input)
108
+ def space_between_numbers(input)
127
109
  input.gsub(/(\d)\s+(\d)/, '\1 \2')
128
110
  end
129
111
 
@@ -131,7 +113,7 @@ module Truty
131
113
  #
132
114
  # @param input [String] The paragraph which will be converted.
133
115
  # @return [String] Paragraph with correct spaces around brackets.
134
- def fix_brackets_whitespace(input)
116
+ def brackets_whitespace(input)
135
117
  output = input.gsub(/([\(\[\{])\s*/, '\1')
136
118
  output = output.gsub(/\s*([\]\)\}])/, '\1')
137
119
  output = output.gsub(/\s+([\(\[\{])\s*/, ' \1')
@@ -142,7 +124,7 @@ module Truty
142
124
  #
143
125
  # @param input [String] The paragraph which will be converted.
144
126
  # @return [String] Paragraph with converted characters.
145
- def fix_multicharacters(input)
127
+ def multicharacters(input)
146
128
  output = input.gsub(/\([Cc]\)/, "©")
147
129
  output = output.gsub(/\([Pp]\)/, "℗")
148
130
  output = output.gsub(/\([Rr]\)/, "®")
@@ -159,7 +141,7 @@ module Truty
159
141
  #
160
142
  # @param input [String] The paragraph which will be converted.
161
143
  # @return [String] Paragraph with correct spaces around punctuation.
162
- def fix_punctuation_whitespace(input)
144
+ def punctuation_whitespace(input)
163
145
  input.gsub(/\s*([\!\?\.,;:…]+)\s*/, '\1 ')
164
146
  end
165
147
 
@@ -167,7 +149,7 @@ module Truty
167
149
  #
168
150
  # @param input [String] The paragraph which will be converted.
169
151
  # @return [String] Paragraph with correct spaces between number and unit.
170
- def fix_units(input)
152
+ def units(input)
171
153
  output = input.gsub(/(\d+)\s+(%|‰|‱|℃|℉|°|€|Kč|(Y|Z|E|P|T|G|M|k|h|da|d|m|µ|n|p|f|a|z|y)?(m(²|³)?|g|s|h|A|K|cd|mol|Ω|℃|℉))/, '\1 \2')
172
154
  output.gsub(/(\*|§|#|†)\s+(\d+)/, '\1 \2')
173
155
  end
@@ -176,7 +158,7 @@ module Truty
176
158
  #
177
159
  # @param input [String] The paragraph which will be converted.
178
160
  # @return [String] Paragraph with removed widows.
179
- def fix_widows(input)
161
+ def widows(input)
180
162
  input.gsub(/(\s)(\S+(\$|\z))/, ' \2')
181
163
  end
182
164
 
@@ -184,7 +166,7 @@ module Truty
184
166
  #
185
167
  # @param input [String] The paragraph which will be converted.
186
168
  # @return [String] Paragraph without trailing spaces.
187
- def fix_trailing_spaces(input)
169
+ def trailing_spaces(input)
188
170
  input.gsub(/\s*($|\z)/, '')
189
171
  end
190
172
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: truty
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matěj Kašpar Jirásek
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-04 00:00:00.000000000 Z
11
+ date: 2015-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: text-hyphen
@@ -66,9 +66,10 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.8'
69
- description: A string converter which aims to correct the typography.
69
+ description: A string converter aiming to correct the typography of plain text.
70
70
  email: matej.jirasek@me.com
71
- executables: []
71
+ executables:
72
+ - truty
72
73
  extensions: []
73
74
  extra_rdoc_files: []
74
75
  files:
@@ -77,6 +78,8 @@ files:
77
78
  - bin/truty
78
79
  - lib/truty.rb
79
80
  - lib/truty/czech.rb
81
+ - lib/truty/english.rb
82
+ - lib/truty/french.rb
80
83
  - lib/truty/general.rb
81
84
  homepage: https://github.com/mkj-is/Truty
82
85
  licenses: