truty 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1f40bb7fec5fbcc791d421f3815710ffea30736b
4
- data.tar.gz: a64fddfb5f0fec224737008005e4f3657b09c22a
3
+ metadata.gz: 4feee35354a4825efaf7b001e6e75d5da250d16d
4
+ data.tar.gz: b026eb59f352ba1eb611a19eac14d87b4b2d426f
5
5
  SHA512:
6
- metadata.gz: 14476727de9625d8872b21ab3134d441b662da6a4c9010d7756ac17ade34d500125b840a9273fda4c2971067c1f6990e115d8af1222760c36a7e43f73a2ea5c8
7
- data.tar.gz: bd59bad1b93812342cb70fba9435b2164cf10acbdaff7b6387c89a1a71606e4d67255770472d001e20bbdecc21e2767628867664dbf7f0847a07e5bef7c0c44a
6
+ metadata.gz: 362609e41ac202bcfb3f46be87e9b6caeb8f6c82b424086feca76222b54bfb9aa60690fd5c0a27909feb58a7f26fb3bd468a72608af6b31125ef4500f0200c49
7
+ data.tar.gz: 100ae5a99c6a5552a8c99aec4444e5dfdd820fba072066b0105675db41003fee0ced281181f82fe0459e71926384a45715f10769996c2c611c76ba1fe484faa1
data/bin/truty CHANGED
@@ -3,7 +3,12 @@
3
3
  require "truty"
4
4
 
5
5
  def main
6
- puts Truty.fix_czech_text(ARGF.read)
6
+ language = :general
7
+ if ARGV[0] == "-l" || ARGV[0] == "--language" then
8
+ language = ARGV[1]
9
+ ARGV.shift(2)
10
+ end
11
+ puts Truty.send :fix, ARGF.read, language
7
12
  end
8
13
 
9
14
  main
@@ -3,6 +3,8 @@
3
3
  require 'uri'
4
4
  require 'text/hyphen'
5
5
  require 'truty/general'
6
+ require 'truty/english'
7
+ require 'truty/french'
6
8
  require 'truty/czech'
7
9
 
8
10
  # A Ruby library which is a simple string converter, which aims to fix all the typography imperfections of the plain text.
@@ -10,7 +12,7 @@ require 'truty/czech'
10
12
  module Truty
11
13
 
12
14
  extend General
13
- extend Czech
15
+ extend English, French, Czech
14
16
 
15
17
  end
16
18
 
@@ -5,52 +5,33 @@ module Truty
5
5
  # @author Matěj Kašpar Jirásek
6
6
  module Czech
7
7
 
8
- # Improves the typography of the large plain text with paragraphs. Adds non-breaking spaces, hyphenation, fixes dashes, etc. Fixes some typography fixes specific for the Czech languages, like one character prepositions, abbreviations and spaces between numbers.
9
- #
10
- # @param input [String] The text which will be converted.
11
- # @return [String] Text with improved typography.
12
- def fix_czech_text(input)
13
- input.split("\n").collect { |p| fix_czech_paragraph(p) }.join("\n")
14
- end
15
-
16
- # Improves the Czech typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {#fix_czech_text}.
8
+ # Improves the Czech typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
17
9
  #
18
10
  # @param input [String] The paragraph which will be converted.
19
11
  # @return [String] Paragraph with improved typography.
20
- def fix_czech_paragraph(input)
21
- output = input
22
- output = ellipsis(output)
23
- output = fix_multicharacters(output)
24
- output = fix_punctuation_whitespace(output)
25
- output = fix_brackets_whitespace(output)
26
- output = add_soft_hyphens(output, "cs")
27
- output = emdash_spaces(output)
28
- output = endash_spaces(output)
29
- output = fix_quotes(output, "\"", "„", "“")
30
- output = fix_quotes(output, "'", "‚", "‘")
31
- output = fix_multiplication_sign(output)
32
- output = fix_space_between_numbers(output)
33
- output = fix_units(output)
34
- output = fix_trailing_spaces(output)
35
- output = fix_widows(output)
36
- output = fix_long_czech_numbers(output)
37
- output = fix_czech_one_character_words(output)
38
- output = fix_czech_abbreviations(output)
12
+ def czech(input)
13
+ input = soft_hyphens(input, "cs")
14
+ input = general(input)
15
+ input = czech_double_quotes(input)
16
+ input = czech_single_quotes(input)
17
+ input = czech_long_numbers(input)
18
+ input = czech_prepositions(input)
19
+ input = czech_abbreviations(input)
39
20
  end
40
21
 
41
22
  # Adds non-breaking space after Czech one character prepostion.
42
23
  #
43
24
  # @param input [String] The paragraph which will be converted.
44
25
  # @return [String] Paragraph with non-breaking spaces after prepositions.
45
- def fix_czech_one_character_words(input)
46
- input.gsub(/(\s+|^|\A)(([aikosuvz]\s+)+)/i) { |prep| $1 + $2.gsub(/\s+/, " ") }
26
+ def czech_prepositions(input)
27
+ input.gsub(/(\s+|^|\A)(([aikosuvz]\s+)+)/i) { $1 + $2.gsub(/\s+/, " ") }
47
28
  end
48
29
 
49
30
  # Divides long numbers into parts of three digits using thin space.
50
31
  #
51
32
  # @param input [String] The paragraph which will be converted.
52
33
  # @return [String] Paragraph with spaces inside of long numbers.
53
- def fix_long_czech_numbers(input)
34
+ def czech_long_numbers(input)
54
35
  input.gsub(/\d+/) { |n| n.reverse.scan(/(.{1,3})/).join(' ').reverse }
55
36
  end
56
37
 
@@ -58,10 +39,26 @@ module Truty
58
39
  #
59
40
  # @param input [String] The paragraph which will be converted.
60
41
  # @return [String] Paragraph with non-breaking spaces in and after abbreviations.
61
- def fix_czech_abbreviations(input)
42
+ def czech_abbreviations(input)
62
43
  abbreviations = /(a. s.|abl. |absol. |adj. |adm. |adv. |aj.|ak. |ak. sl.|akt. |alch. |amer. |anat. |angl. |anglosas. |ap.|apod.|arab. |arch. |archit. |arg. |arm. gen. |astr. |astrol. |atd.|atp.|att. |b. k.|Bc. |BcA. |belg. |bibl. |biol. |bl. |boh. |bot. |br. |brig. gen. |brit. |bulh. |bás. |býv. |chcsl. |chem. |chil. |CSc. |csl. |círk. |dat. |dep. |des. |dial. |DiS.|dl. |doc. |dol. |dop. |dopr. |dosl. |dán. |dór. |děj. |dět. |ekon. |epic. |etnonym. |eufem. |ev. |event. |f. |fam. |fem. |fil. |film. |fin. |form. |fot. |fr. |fut. |fyz. |gen. |genmjr. |genplk. |genpor. |geogr. |geol. |geom. |germ. |gram. |hebr. |herald. |hist. |hl. |hod. |hor. |horn. |hovor. |hud. |hut. |ie. |imp. |impf. |ind. |indoevr. |inf. |Ing. |instr. |interj. |iron. |it. |ión. |j. č.|jap. |JUDr. |k. s.|kanad. |katalán. |klas. |kniž. |komp. |konj. |konkr. |kpt. |kr. |kuch. |kř. |lat. |les. |lid. |lit. |liturg. |log. |lok. |lék. |m. |mat. |meteor. |metr. |MgA. |Mgr. |mil. |mj. |mjr. |ml. |mld. |mn. č.|mod. |ms. |MUDr. |MVDr. |mysl. |n. |n. l.|např. |neklas. |nesklon. |než. |niz. |nom. |nor. |npor. |nprap. |nrtm. |nstržm. |náb. |nám. |námoř. |něm. |o. p. s.|o. s.|ob. |obch. |obyč. |odd. |odp. |ojed. |opt. |p. |p. n. l.|p. o.|P. S. |P. T. |part. |pas. |pejor. |pers. |pf. |PharmDr. |PhDr. |pl. |plk. |plpf. |po Kr.|pol. |pomn. |popř. |por. |pplk. |ppor. |pprap. |prap. |prep. |prof. |práv. |př. Kr.|př. n. l.|před n. l.|předl. |přivl. |r. |rak. |rcsl. |refl. |reg. |resp. |rkp. |RNDr. |roč. |RSDr. |rtm. |rtn. |rum. |rus. |s. |s. p.|s. r. o.|samohl. |Sb. |sg. |sl. |slang. |slov. |souhl. |spec. |spol. s r. o.|sport. |srov. |st. |stfr. |stol. |str. |stržm. |stsl. |střv. |subj. |subst. |superl. |sv. |svob. |sz. |t. r.|tech. |telev. |teol. |ThDr. |tis. |tj. |trans. |tur. |typogr. |tzn. |tzv. |táz. |v z.|v. o. s.|v. r.|v. v. i.|var. |vedl. |verb. |vl. jm. |voj. |vok. |vulg. |vztaž. |výtv. |vč. |vůb. |z. s.|zahr. |zast. |zejm. |zeměd. |zkr. |zn. |zvl. |zájm. |zř. |č. |č. j.|č. p. |čas. |čes. |čet. |čj. |čp. |čín. |čís. |ř. |řec. |říj. |škpt. |špan. |šprap. |št. prap. |švýc. )/i
63
44
  input.gsub(abbreviations) { |abbr| abbr.gsub(/ /, ' ') }
64
45
  end
65
46
 
47
+ # Converts single quotes to the typograhic ones.
48
+ #
49
+ # @param input [String] The paragraph which will be converted.
50
+ # @return [String] Paragraph with correct single quotes.
51
+ def czech_single_quotes(input)
52
+ quotes(input, "'", "‚", "‘")
53
+ end
54
+
55
+ # Converts double quotes to the typograhic ones.
56
+ #
57
+ # @param input [String] The paragraph which will be converted.
58
+ # @return [String] Paragraph with correct double quotes.
59
+ def czech_double_quotes(input)
60
+ quotes(input, "\"", "„", "”")
61
+ end
62
+
66
63
  end
67
64
  end
@@ -0,0 +1,37 @@
1
+
2
+ module Truty
3
+
4
+ # Module with specific English typography fixes.
5
+ # @author Matěj Kašpar Jirásek
6
+ module English
7
+
8
+ # Improves the English typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
9
+ #
10
+ # @param input [String] The paragraph which will be converted.
11
+ # @param country [String] The country ("uk" or "us").
12
+ # @return [String] Paragraph with improved typography.
13
+ def english(input, country = "us")
14
+ input = soft_hyphens(input, "en_" + country)
15
+ input = general(input)
16
+ input = english_double_quotes(input)
17
+ input = english_single_quotes(input)
18
+ end
19
+
20
+ # Converts single quotes to the typograhic ones.
21
+ #
22
+ # @param input [String] The paragraph which will be converted.
23
+ # @return [String] Paragraph with correct single quotes.
24
+ def english_single_quotes(input)
25
+ quotes(input, "'", "‘", "’")
26
+ end
27
+
28
+ # Converts double quotes to the typograhic ones.
29
+ #
30
+ # @param input [String] The paragraph which will be converted.
31
+ # @return [String] Paragraph with correct double quotes.
32
+ def english_double_quotes(input)
33
+ quotes(input)
34
+ end
35
+
36
+ end
37
+ end
@@ -0,0 +1,36 @@
1
+
2
+ module Truty
3
+
4
+ # Module with specific French typography fixes.
5
+ # @author Matěj Kašpar Jirásek
6
+ module French
7
+
8
+ # Improves the French typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
9
+ #
10
+ # @param input [String] The paragraph which will be converted.
11
+ # @return [String] Paragraph with improved typography.
12
+ def french(input)
13
+ input = soft_hyphens(input, "fr")
14
+ input = general(input)
15
+ input = french_double_quotes(input)
16
+ input = french_single_quotes(input)
17
+ end
18
+
19
+ # Converts single quotes to the typograhic ones.
20
+ #
21
+ # @param input [String] The paragraph which will be converted.
22
+ # @return [String] Paragraph with correct single quotes.
23
+ def french_single_quotes(input)
24
+ quotes(input, "'", "‹ ", " ›")
25
+ end
26
+
27
+ # Converts double quotes to the typograhic ones.
28
+ #
29
+ # @param input [String] The paragraph which will be converted.
30
+ # @return [String] Paragraph with correct double quotes.
31
+ def french_double_quotes(input)
32
+ quotes(input, "\"", "« ", " »")
33
+ end
34
+
35
+ end
36
+ end
@@ -8,33 +8,31 @@ module Truty
8
8
  # Improves the typography of the large plain text with paragraphs. Adds non-breaking spaces, hyphenation, fixes dashes, etc.
9
9
  #
10
10
  # @param input [String] The text which will be converted.
11
- # @param lang [String] Sets the language of hyphenation. (See {#add_soft_hyphens}.)
11
+ # @param lang [Symbol] Sets the language (english name like "czech", "german", etc.)
12
12
  # @return [String] Text with improved typography.
13
- def fix(input, lang = "en_us")
14
- input.split("\n").collect { |p| fix_paragraph(p, lang) }.join("\n")
13
+ def fix(input, lang = :general)
14
+ if not Truty.respond_to? lang then
15
+ lang = :general
16
+ end
17
+ input.split("\n").collect { |p| Truty.send lang, p }.join("\n")
15
18
  end
16
19
 
17
- # Improves the typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {#fix}.
20
+ # Improves basic non-language specific issues in typography.
18
21
  #
19
22
  # @param input [String] The paragraph which will be converted.
20
- # @param lang [String] Sets the language of hyphenation. (See {#add_soft_hyphens}.)
21
23
  # @return [String] Paragraph with improved typography.
22
- def fix_paragraph(input, lang = "en_us")
23
- output = input
24
- output = ellipsis(output)
25
- output = fix_multicharacters(output)
26
- output = fix_punctuation_whitespace(output)
27
- output = fix_brackets_whitespace(output)
28
- output = add_soft_hyphens(output, lang)
29
- output = emdash_spaces(output)
30
- output = endash_spaces(output)
31
- output = fix_double_quotes(output)
32
- output = fix_single_quotes(output)
33
- output = fix_multiplication_sign(output)
34
- output = fix_space_between_numbers(output)
35
- output = fix_units(output)
36
- output = fix_trailing_spaces(output)
37
- output = fix_widows(output)
24
+ def general(input)
25
+ input = ellipsis(input)
26
+ input = multicharacters(input)
27
+ input = punctuation_whitespace(input)
28
+ input = brackets_whitespace(input)
29
+ input = emdash(input)
30
+ input = endash(input)
31
+ input = multiplication_sign(input)
32
+ input = space_between_numbers(input)
33
+ input = units(input)
34
+ input = trailing_spaces(input)
35
+ input = widows(input)
38
36
  end
39
37
 
40
38
  # Converts three or more periods (dots, points) into ellipsis.
@@ -49,7 +47,7 @@ module Truty
49
47
  #
50
48
  # @param input [String] The paragraph which will be converted.
51
49
  # @return [String] Paragraph with corrected emdashes.
52
- def emdash_spaces(input)
50
+ def emdash(input)
53
51
  input.gsub(/\s+(—|-{2,3})\s+/, " — ")
54
52
  end
55
53
 
@@ -57,7 +55,7 @@ module Truty
57
55
  #
58
56
  # @param input [String] The paragraph which will be converted.
59
57
  # @return [String] Paragraph with corrected endashes.
60
- def endash_spaces(input)
58
+ def endash(input)
61
59
  input.gsub(/\s+(–|-)\s+/, " – ")
62
60
  end
63
61
 
@@ -69,7 +67,7 @@ module Truty
69
67
  # @param right [Integer] Number of characters on the beginning of the words which cannnot be hyphenated.
70
68
  # @param char [Integer] The character which will be added to hyphenation places.
71
69
  # @return [String] Paragraph with added hyphenation characters.
72
- def add_soft_hyphens(input, lang = "en_us", left = 2, right = 2, char = "­")
70
+ def soft_hyphens(input, lang = "en_us", left = 2, right = 2, char = "­")
73
71
  l = Text::Hyphen.new(:language => lang, :left => left, :right => right)
74
72
  words = input.split(/[ ]+/m)
75
73
  result = []
@@ -89,32 +87,16 @@ module Truty
89
87
  # @param start_quotes [String] The character used for starting quotes.
90
88
  # @param end_quotes [String] The character used for ending quotes.
91
89
  # @return [String] Paragraph with correct double quotes.
92
- def fix_quotes(input, type = '"', start_quotes = "“", end_quotes = "”")
90
+ def quotes(input, type = '"', start_quotes = "“", end_quotes = "”")
93
91
  regexp = Regexp.new(type + '[^' + type + ']*' + type)
94
92
  input.gsub(regexp) { |s| start_quotes + s[1..-2].strip + end_quotes }
95
93
  end
96
94
 
97
- # Converts single quotes to the typograhic ones.
98
- #
99
- # @param input [String] The paragraph which will be converted.
100
- # @return [String] Paragraph with correct single quotes.
101
- def fix_single_quotes(input)
102
- fix_quotes(input, "'", "‘", "’")
103
- end
104
-
105
- # Converts double quotes to the typograhic ones.
106
- #
107
- # @param input [String] The paragraph which will be converted.
108
- # @return [String] Paragraph with correct double quotes.
109
- def fix_double_quotes(input)
110
- fix_quotes(input, '"', "“", "”")
111
- end
112
-
113
95
  # Adds multiplication sign between numbers instead of X.
114
96
  #
115
97
  # @param input [String] The paragraph which will be converted.
116
98
  # @return [String] Paragraph with correct multiplication signs.
117
- def fix_multiplication_sign(input)
99
+ def multiplication_sign(input)
118
100
  output = input.gsub(/(\d+)\s{0,1}[Xx]\s{0,1}(\d+)/, '\1 × \2')
119
101
  output = output.gsub(/(\d+)[Xx]/, '\1×')
120
102
  end
@@ -123,7 +105,7 @@ module Truty
123
105
  #
124
106
  # @param input [String] The paragraph which will be converted.
125
107
  # @return [String] Paragraph with correct spaces between numbers.
126
- def fix_space_between_numbers(input)
108
+ def space_between_numbers(input)
127
109
  input.gsub(/(\d)\s+(\d)/, '\1 \2')
128
110
  end
129
111
 
@@ -131,7 +113,7 @@ module Truty
131
113
  #
132
114
  # @param input [String] The paragraph which will be converted.
133
115
  # @return [String] Paragraph with correct spaces around brackets.
134
- def fix_brackets_whitespace(input)
116
+ def brackets_whitespace(input)
135
117
  output = input.gsub(/([\(\[\{])\s*/, '\1')
136
118
  output = output.gsub(/\s*([\]\)\}])/, '\1')
137
119
  output = output.gsub(/\s+([\(\[\{])\s*/, ' \1')
@@ -142,7 +124,7 @@ module Truty
142
124
  #
143
125
  # @param input [String] The paragraph which will be converted.
144
126
  # @return [String] Paragraph with converted characters.
145
- def fix_multicharacters(input)
127
+ def multicharacters(input)
146
128
  output = input.gsub(/\([Cc]\)/, "©")
147
129
  output = output.gsub(/\([Pp]\)/, "℗")
148
130
  output = output.gsub(/\([Rr]\)/, "®")
@@ -159,7 +141,7 @@ module Truty
159
141
  #
160
142
  # @param input [String] The paragraph which will be converted.
161
143
  # @return [String] Paragraph with correct spaces around punctuation.
162
- def fix_punctuation_whitespace(input)
144
+ def punctuation_whitespace(input)
163
145
  input.gsub(/\s*([\!\?\.,;:…]+)\s*/, '\1 ')
164
146
  end
165
147
 
@@ -167,7 +149,7 @@ module Truty
167
149
  #
168
150
  # @param input [String] The paragraph which will be converted.
169
151
  # @return [String] Paragraph with correct spaces between number and unit.
170
- def fix_units(input)
152
+ def units(input)
171
153
  output = input.gsub(/(\d+)\s+(%|‰|‱|℃|℉|°|€|Kč|(Y|Z|E|P|T|G|M|k|h|da|d|m|µ|n|p|f|a|z|y)?(m(²|³)?|g|s|h|A|K|cd|mol|Ω|℃|℉))/, '\1 \2')
172
154
  output.gsub(/(\*|§|#|†)\s+(\d+)/, '\1 \2')
173
155
  end
@@ -176,7 +158,7 @@ module Truty
176
158
  #
177
159
  # @param input [String] The paragraph which will be converted.
178
160
  # @return [String] Paragraph with removed widows.
179
- def fix_widows(input)
161
+ def widows(input)
180
162
  input.gsub(/(\s)(\S+(\$|\z))/, ' \2')
181
163
  end
182
164
 
@@ -184,7 +166,7 @@ module Truty
184
166
  #
185
167
  # @param input [String] The paragraph which will be converted.
186
168
  # @return [String] Paragraph without trailing spaces.
187
- def fix_trailing_spaces(input)
169
+ def trailing_spaces(input)
188
170
  input.gsub(/\s*($|\z)/, '')
189
171
  end
190
172
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: truty
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matěj Kašpar Jirásek
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-04 00:00:00.000000000 Z
11
+ date: 2015-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: text-hyphen
@@ -66,9 +66,10 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.8'
69
- description: A string converter which aims to correct the typography.
69
+ description: A string converter aiming to correct the typography of plain text.
70
70
  email: matej.jirasek@me.com
71
- executables: []
71
+ executables:
72
+ - truty
72
73
  extensions: []
73
74
  extra_rdoc_files: []
74
75
  files:
@@ -77,6 +78,8 @@ files:
77
78
  - bin/truty
78
79
  - lib/truty.rb
79
80
  - lib/truty/czech.rb
81
+ - lib/truty/english.rb
82
+ - lib/truty/french.rb
80
83
  - lib/truty/general.rb
81
84
  homepage: https://github.com/mkj-is/Truty
82
85
  licenses: