RubyGems - truty - Versions diffs - 0.1.1 → 0.2.0 - Mend

truty 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 1f40bb7fec5fbcc791d421f3815710ffea30736b
-  data.tar.gz: a64fddfb5f0fec224737008005e4f3657b09c22a
+  metadata.gz: 4feee35354a4825efaf7b001e6e75d5da250d16d
+  data.tar.gz: b026eb59f352ba1eb611a19eac14d87b4b2d426f
 SHA512:
-  metadata.gz: 14476727de9625d8872b21ab3134d441b662da6a4c9010d7756ac17ade34d500125b840a9273fda4c2971067c1f6990e115d8af1222760c36a7e43f73a2ea5c8
-  data.tar.gz: bd59bad1b93812342cb70fba9435b2164cf10acbdaff7b6387c89a1a71606e4d67255770472d001e20bbdecc21e2767628867664dbf7f0847a07e5bef7c0c44a
+  metadata.gz: 362609e41ac202bcfb3f46be87e9b6caeb8f6c82b424086feca76222b54bfb9aa60690fd5c0a27909feb58a7f26fb3bd468a72608af6b31125ef4500f0200c49
+  data.tar.gz: 100ae5a99c6a5552a8c99aec4444e5dfdd820fba072066b0105675db41003fee0ced281181f82fe0459e71926384a45715f10769996c2c611c76ba1fe484faa1

data/bin/truty CHANGED

@@ -3,7 +3,12 @@
 require "truty"
 def main
-  puts Truty.fix_czech_text(ARGF.read)
+  language = :general
+  if ARGV[0] == "-l" || ARGV[0] == "--language" then
+    language = ARGV[1]
+    ARGV.shift(2)
+  end
+  puts Truty.send :fix, ARGF.read, language
 end
 main

data/lib/truty.rb CHANGED

@@ -3,6 +3,8 @@
 require 'uri'
 require 'text/hyphen'
 require 'truty/general'
+require 'truty/english'
+require 'truty/french'
 require 'truty/czech'
 # A Ruby library which is a simple string converter, which aims to fix all the typography imperfections of the plain text.
@@ -10,7 +12,7 @@ require 'truty/czech'
 module Truty
   extend General
-  extend Czech
+  extend English, French, Czech
 end

data/lib/truty/czech.rb CHANGED

@@ -5,52 +5,33 @@ module Truty
   # @author Matěj Kašpar Jirásek
   module Czech
-    # Improves the typography of the large plain text with paragraphs. Adds non-breaking spaces, hyphenation, fixes dashes, etc. Fixes some typography fixes specific for the Czech languages, like one character prepositions, abbreviations and spaces between numbers.
-    #
-    # @param input [String] The text which will be converted.
-    # @return [String] Text with improved typography.
-    def fix_czech_text(input)
-      input.split("\n").collect { |p| fix_czech_paragraph(p) }.join("\n")
-    end
-    # Improves the Czech typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {#fix_czech_text}.
+    # Improves the Czech typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph with improved typography.
-    def fix_czech_paragraph(input)
-      output = input
-      output = ellipsis(output)
-      output = fix_multicharacters(output)
-      output = fix_punctuation_whitespace(output)
-      output = fix_brackets_whitespace(output)
-      output = add_soft_hyphens(output, "cs")
-      output = emdash_spaces(output)
-      output = endash_spaces(output)
-      output = fix_quotes(output, "\"", "„", "“")
-      output = fix_quotes(output, "'", "‚", "‘")
-      output = fix_multiplication_sign(output)
-      output = fix_space_between_numbers(output)
-      output = fix_units(output)
-      output = fix_trailing_spaces(output)
-      output = fix_widows(output)
-      output = fix_long_czech_numbers(output)
-      output = fix_czech_one_character_words(output)
-      output = fix_czech_abbreviations(output)
+    def czech(input)
+      input = soft_hyphens(input, "cs")
+      input = general(input)
+      input = czech_double_quotes(input)
+      input = czech_single_quotes(input)
+      input = czech_long_numbers(input)
+      input = czech_prepositions(input)
+      input = czech_abbreviations(input)
     end
     # Adds non-breaking space after Czech one character prepostion.
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph with non-breaking spaces after prepositions.
-    def fix_czech_one_character_words(input)
-      input.gsub(/(\s+|^|\A)(([aikosuvz]\s+)+)/i) { |prep| $1 + $2.gsub(/\s+/, " ") }
+    def czech_prepositions(input)
+      input.gsub(/(\s+|^|\A)(([aikosuvz]\s+)+)/i) { $1 + $2.gsub(/\s+/, " ") }
     end
     # Divides long numbers into parts of three digits using thin space.
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph with spaces inside of long numbers.
-    def fix_long_czech_numbers(input)
+    def czech_long_numbers(input)
       input.gsub(/\d+/) { |n| n.reverse.scan(/(.{1,3})/).join(' ').reverse }
     end
@@ -58,10 +39,26 @@ module Truty
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph with non-breaking spaces in and after abbreviations.
-    def fix_czech_abbreviations(input)
+    def czech_abbreviations(input)
       abbreviations = /(a. s.|abl. |absol. |adj. |adm. |adv. |aj.|ak. |ak. sl.|akt. |alch. |amer. |anat. |angl. |anglosas. |ap.|apod.|arab. |arch. |archit. |arg. |arm. gen. |astr. |astrol. |atd.|atp.|att. |b. k.|Bc. |BcA. |belg. |bibl. |biol. |bl. |boh. |bot. |br. |brig. gen. |brit. |bulh. |bás. |býv. |chcsl. |chem. |chil. |CSc. |csl. |círk. |dat. |dep. |des. |dial. |DiS.|dl. |doc. |dol. |dop. |dopr. |dosl. |dán. |dór. |děj. |dět. |ekon. |epic. |etnonym. |eufem. |ev. |event. |f. |fam. |fem. |fil. |film. |fin. |form. |fot. |fr. |fut. |fyz. |gen. |genmjr. |genplk. |genpor. |geogr. |geol. |geom. |germ. |gram. |hebr. |herald. |hist. |hl. |hod. |hor. |horn. |hovor. |hud. |hut. |ie. |imp. |impf. |ind. |indoevr. |inf. |Ing. |instr. |interj. |iron. |it. |ión. |j. č.|jap. |JUDr. |k. s.|kanad. |katalán. |klas. |kniž. |komp. |konj. |konkr. |kpt. |kr. |kuch. |kř. |lat. |les. |lid. |lit. |liturg. |log. |lok. |lék. |m. |mat. |meteor. |metr. |MgA. |Mgr. |mil. |mj. |mjr. |ml. |mld. |mn. č.|mod. |ms. |MUDr. |MVDr. |mysl. |n. |n. l.|např. |neklas. |nesklon. |než. |niz. |nom. |nor. |npor. |nprap. |nrtm. |nstržm. |náb. |nám. |námoř. |něm. |o. p. s.|o. s.|ob. |obch. |obyč. |odd. |odp. |ojed. |opt. |p. |p. n. l.|p. o.|P. S. |P. T. |part. |pas. |pejor. |pers. |pf. |PharmDr. |PhDr. |pl. |plk. |plpf. |po Kr.|pol. |pomn. |popř. |por. |pplk. |ppor. |pprap. |prap. |prep. |prof. |práv. |př. Kr.|př. n. l.|před n. l.|předl. |přivl. |r. |rak. |rcsl. |refl. |reg. |resp. |rkp. |RNDr. |roč. |RSDr. |rtm. |rtn. |rum. |rus. |s. |s. p.|s. r. o.|samohl. |Sb. |sg. |sl. |slang. |slov. |souhl. |spec. |spol. s r. o.|sport. |srov. |st. |stfr. |stol. |str. |stržm. |stsl. |střv. |subj. |subst. |superl. |sv. |svob. |sz. |t. r.|tech. |telev. |teol. |ThDr. |tis. |tj. |trans. |tur. |typogr. |tzn. |tzv. |táz. |v z.|v. o. s.|v. r.|v. v. i.|var. |vedl. |verb. |vl. jm. |voj. |vok. |vulg. |vztaž. |výtv. |vč. |vůb. |z. s.|zahr. |zast. |zejm. |zeměd. |zkr. |zn. |zvl. |zájm. |zř. |č. |č. j.|č. p. |čas. |čes. |čet. |čj. |čp. |čín. |čís. |ř. |řec. |říj. |škpt. |špan. |šprap. |št. prap. |švýc. )/i
       input.gsub(abbreviations) { |abbr| abbr.gsub(/ /, ' ') }
     end
+    # Converts single quotes to the typograhic ones.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with correct single quotes.
+    def czech_single_quotes(input)
+      quotes(input, "'", "‚", "‘")
+    end
+    # Converts double quotes to the typograhic ones.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with correct double quotes.
+    def czech_double_quotes(input)
+      quotes(input, "\"", "„", "”")
+    end
   end
 end

data/lib/truty/english.rb ADDED

@@ -0,0 +1,37 @@
+module Truty
+  # Module with specific English typography fixes.
+  # @author Matěj Kašpar Jirásek
+  module English
+    # Improves the English typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @param country [String] The country ("uk" or "us").
+    # @return [String] Paragraph with improved typography.
+    def english(input, country = "us")
+      input = soft_hyphens(input, "en_" + country)
+      input = general(input)
+      input = english_double_quotes(input)
+      input = english_single_quotes(input)
+    end
+    # Converts single quotes to the typograhic ones.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with correct single quotes.
+    def english_single_quotes(input)
+      quotes(input, "'", "‘", "’")
+    end
+    # Converts double quotes to the typograhic ones.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with correct double quotes.
+    def english_double_quotes(input)
+      quotes(input)
+    end
+  end
+end

data/lib/truty/french.rb ADDED

@@ -0,0 +1,36 @@
+module Truty
+  # Module with specific French typography fixes.
+  # @author Matěj Kašpar Jirásek
+  module French
+    # Improves the French typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {General#fix}.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with improved typography.
+    def french(input)
+      input = soft_hyphens(input, "fr")
+      input = general(input)
+      input = french_double_quotes(input)
+      input = french_single_quotes(input)
+    end
+    # Converts single quotes to the typograhic ones.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with correct single quotes.
+    def french_single_quotes(input)
+      quotes(input, "'", "‹ ", " ›")
+    end
+    # Converts double quotes to the typograhic ones.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with correct double quotes.
+    def french_double_quotes(input)
+      quotes(input, "\"", "« ", " »")
+    end
+  end
+end

data/lib/truty/general.rb CHANGED

@@ -8,33 +8,31 @@ module Truty
     # Improves the typography of the large plain text with paragraphs. Adds non-breaking spaces, hyphenation, fixes dashes, etc.
     #
     # @param input [String] The text which will be converted.
-    # @param lang [String] Sets the language of hyphenation. (See {#add_soft_hyphens}.)
+    # @param lang [Symbol] Sets the language (english name like "czech", "german", etc.)
     # @return [String] Text with improved typography.
-    def fix(input, lang = "en_us")
-      input.split("\n").collect { |p| fix_paragraph(p, lang) }.join("\n")
+    def fix(input, lang = :general)
+      if not Truty.respond_to? lang then
+        lang = :general
+      end
+      input.split("\n").collect { |p| Truty.send lang, p }.join("\n")
     end
-    # Improves the typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {#fix}.
+    # Improves basic non-language specific issues in typography.
     #
     # @param input [String] The paragraph which will be converted.
-    # @param lang [String] Sets the language of hyphenation. (See {#add_soft_hyphens}.)
     # @return [String] Paragraph with improved typography.
-    def fix_paragraph(input, lang = "en_us")
-      output = input
-      output = ellipsis(output)
-      output = fix_multicharacters(output)
-      output = fix_punctuation_whitespace(output)
-      output = fix_brackets_whitespace(output)
-      output = add_soft_hyphens(output, lang)
-      output = emdash_spaces(output)
-      output = endash_spaces(output)
-      output = fix_double_quotes(output)
-      output = fix_single_quotes(output)
-      output = fix_multiplication_sign(output)
-      output = fix_space_between_numbers(output)
-      output = fix_units(output)
-      output = fix_trailing_spaces(output)
-      output = fix_widows(output)
+    def general(input)
+      input = ellipsis(input)
+      input = multicharacters(input)
+      input = punctuation_whitespace(input)
+      input = brackets_whitespace(input)
+      input = emdash(input)
+      input = endash(input)
+      input = multiplication_sign(input)
+      input = space_between_numbers(input)
+      input = units(input)
+      input = trailing_spaces(input)
+      input = widows(input)
     end
     # Converts three or more periods (dots, points) into ellipsis.
@@ -49,7 +47,7 @@ module Truty
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph with corrected emdashes.
-    def emdash_spaces(input)
+    def emdash(input)
       input.gsub(/\s+(—|-{2,3})\s+/, " — ")
     end
@@ -57,7 +55,7 @@ module Truty
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph with corrected endashes.
-    def endash_spaces(input)
+    def endash(input)
       input.gsub(/\s+(–|-)\s+/, " – ")
     end
@@ -69,7 +67,7 @@ module Truty
     # @param right [Integer] Number of characters on the beginning of the words which cannnot be hyphenated.
     # @param char [Integer] The character which will be added to hyphenation places.
     # @return [String] Paragraph with added hyphenation characters.
-    def add_soft_hyphens(input, lang = "en_us", left = 2, right = 2, char = "")
+    def soft_hyphens(input, lang = "en_us", left = 2, right = 2, char = "")
       l = Text::Hyphen.new(:language => lang, :left => left, :right => right)
       words = input.split(/[ ]+/m)
       result = []
@@ -89,32 +87,16 @@ module Truty
     # @param start_quotes [String] The character used for starting quotes.
     # @param end_quotes [String] The character used for ending quotes.
     # @return [String] Paragraph with correct double quotes.
-    def fix_quotes(input, type = '"', start_quotes = "“", end_quotes = "”")
+    def quotes(input, type = '"', start_quotes = "“", end_quotes = "”")
       regexp = Regexp.new(type + '[^' + type + ']*' + type)
       input.gsub(regexp) { |s| start_quotes + s[1..-2].strip + end_quotes }
     end
-    # Converts single quotes to the typograhic ones.
-    #
-    # @param input [String] The paragraph which will be converted.
-    # @return [String] Paragraph with correct single quotes.
-    def fix_single_quotes(input)
-      fix_quotes(input, "'", "‘", "’")
-    end
-    # Converts double quotes to the typograhic ones.
-    #
-    # @param input [String] The paragraph which will be converted.
-    # @return [String] Paragraph with correct double quotes.
-    def fix_double_quotes(input)
-      fix_quotes(input, '"', "“", "”")
-    end
     # Adds multiplication sign between numbers instead of X.
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph with correct multiplication signs.
-    def fix_multiplication_sign(input)
+    def multiplication_sign(input)
       output = input.gsub(/(\d+)\s{0,1}[Xx]\s{0,1}(\d+)/, '\1 × \2')
       output = output.gsub(/(\d+)[Xx]/, '\1×')
     end
@@ -123,7 +105,7 @@ module Truty
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph with correct spaces between numbers.
-    def fix_space_between_numbers(input)
+    def space_between_numbers(input)
       input.gsub(/(\d)\s+(\d)/, '\1 \2')
     end
@@ -131,7 +113,7 @@ module Truty
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph with correct spaces around brackets.
-    def fix_brackets_whitespace(input)
+    def brackets_whitespace(input)
       output = input.gsub(/([\(\[\{])\s*/, '\1')
       output = output.gsub(/\s*([\]\)\}])/, '\1')
       output = output.gsub(/\s+([\(\[\{])\s*/, ' \1')
@@ -142,7 +124,7 @@ module Truty
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph with converted characters.
-    def fix_multicharacters(input)
+    def multicharacters(input)
       output = input.gsub(/\([Cc]\)/, "©")
       output = output.gsub(/\([Pp]\)/, "℗")
       output = output.gsub(/\([Rr]\)/, "®")
@@ -159,7 +141,7 @@ module Truty
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph with correct spaces around punctuation.
-    def fix_punctuation_whitespace(input)
+    def punctuation_whitespace(input)
       input.gsub(/\s*([\!\?\.,;:…]+)\s*/, '\1 ')
     end
@@ -167,7 +149,7 @@ module Truty
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph with correct spaces between number and unit.
-    def fix_units(input)
+    def units(input)
       output = input.gsub(/(\d+)\s+(%|‰|‱|℃|℉|°|€|Kč|(Y|Z|E|P|T|G|M|k|h|da|d|m|µ|n|p|f|a|z|y)?(m(²|³)?|g|s|h|A|K|cd|mol|Ω|℃|℉))/, '\1 \2')
       output.gsub(/(\*|§|#|†)\s+(\d+)/, '\1 \2')
     end
@@ -176,7 +158,7 @@ module Truty
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph with removed widows.
-    def fix_widows(input)
+    def widows(input)
       input.gsub(/(\s)(\S+(\$|\z))/, ' \2')
     end
@@ -184,7 +166,7 @@ module Truty
     #
     # @param input [String] The paragraph which will be converted.
     # @return [String] Paragraph without trailing spaces.
-    def fix_trailing_spaces(input)
+    def trailing_spaces(input)
       input.gsub(/\s*($|\z)/, '')
     end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: truty
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.2.0
 platform: ruby
 authors:
 - Matěj Kašpar Jirásek
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-01-04 00:00:00.000000000 Z
+date: 2015-01-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: text-hyphen
@@ -66,9 +66,10 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.8'
-description: A string converter which aims to correct the typography.
+description: A string converter aiming to correct the typography of plain text.
 email: matej.jirasek@me.com
-executables: []
+executables:
+- truty
 extensions: []
 extra_rdoc_files: []
 files:
@@ -77,6 +78,8 @@ files:
 - bin/truty
 - lib/truty.rb
 - lib/truty/czech.rb
+- lib/truty/english.rb
+- lib/truty/french.rb
 - lib/truty/general.rb
 homepage: https://github.com/mkj-is/Truty
 licenses: