RubyGems - truty - Versions diffs - 0.1.0 - Mend

truty 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: e33c09b42c8ceb567b09cf0a0577bb01b20dba5c
+  data.tar.gz: 08f1808d7826aeef5738b05af2081799d0606cd5
+SHA512:
+  metadata.gz: cb6bd314915bfa2ca1d9d467b39fdc647f0528820d255c73808a1389eddd5687ae3d72deb423bd2c1b2ecc06451d8e483b2067f53fa4db1ec7be3eaf559657d9
+  data.tar.gz: f10562c3e8a6215d74cfdf0308a02cfd2b281b724c4eb11116daacf7a0f84a80a7244df7c861b82f58ebadc854267a4791e4f03ed86f23dcbe3e6f4048fad8c3

data/LICENSE ADDED Viewed

@@ -0,0 +1,22 @@
+The MIT License (MIT)
+Copyright (c) 2014 Matěj Kašpar Jirásek
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,4 @@
+# Truty
+This is a ruby gem in development which is a simple string converter, which aims to fix all the typography imperfections of the plain text.

data/bin/truty ADDED Viewed

@@ -0,0 +1,9 @@
+#!/usr/bin/env ruby -rubygems
+require "truty"
+def main
+  puts Truty.fix_czech_text(ARGF.read)
+end
+main

data/lib/truty.rb ADDED Viewed

@@ -0,0 +1,16 @@
+#!/usr/bin/ruby
+require 'uri'
+require 'text/hyphen'
+require 'truty/general'
+require 'truty/czech'
+# A Ruby library which is a simple string converter, which aims to fix all the typography imperfections of the plain text.
+# @author Matěj Kašpar Jirásek
+module Truty
+  extend General
+  extend Czech
+end

data/lib/truty/czech.rb ADDED Viewed

@@ -0,0 +1,67 @@
+module Truty
+  # Module with specific Czech typography fixes.
+  # @author Matěj Kašpar Jirásek
+  module Czech
+    # Improves the typography of the large plain text with paragraphs. Adds non-breaking spaces, hyphenation, fixes dashes, etc. Fixes some typography fixes specific for the Czech languages, like one character prepositions, abbreviations and spaces between numbers.
+    #
+    # @param input [String] The text which will be converted.
+    # @return [String] Text with improved typography.
+    def fix_czech_text(input)
+      input.split("\n").collect { |p| fix_czech_paragraph(p) }.join("\n")
+    end
+    # Improves the Czech typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {#fix_czech_text}.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with improved typography.
+    def fix_czech_paragraph(input)
+      output = input
+      output = ellipsis(output)
+      output = fix_multicharacters(output)
+      output = fix_punctuation_whitespace(output)
+      output = fix_brackets_whitespace(output)
+      output = add_soft_hyphens(output, "cs")
+      output = emdash_spaces(output)
+      output = endash_spaces(output)
+      output = fix_double_quotes(output, "„", "“")
+      output = fix_single_quotes(output, "‚", "‘")
+      output = fix_multiplication_sign(output)
+      output = fix_space_between_numbers(output)
+      output = fix_units(output)
+      output = fix_trailing_spaces(output)
+      output = fix_widows(output)
+      output = fix_long_czech_numbers(output)
+      output = fix_czech_one_character_words(output)
+      output = fix_czech_abbreviations(output)
+    end
+    # Adds non-breaking space after Czech one character prepostion.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with non-breaking spaces after prepositions.
+    def fix_czech_one_character_words(input)
+      input.gsub(/(\s+|^|\A)(([aikosuvz]\s+)+)/i) { |prep| $1 + $2.gsub(/\s+/, " ") }
+    end
+    # Divides long numbers into parts of three digits using thin space.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with spaces inside of long numbers.
+    def fix_long_czech_numbers(input)
+      input.gsub(/\d+/) { |n| n.reverse.scan(/(.{1,3})/).join(' ').reverse }
+    end
+    # Adds non-breaking spaces in and after Czech abbreviations.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with non-breaking spaces in and after abbreviations.
+    def fix_czech_abbreviations(input)
+      abbreviations = /(a. s.|abl. |absol. |adj. |adm. |adv. |aj.|ak. |ak. sl.|akt. |alch. |amer. |anat. |angl. |anglosas. |ap.|apod.|arab. |arch. |archit. |arg. |arm. gen. |astr. |astrol. |atd.|atp.|att. |b. k.|Bc. |BcA. |belg. |bibl. |biol. |bl. |boh. |bot. |br. |brig. gen. |brit. |bulh. |bás. |býv. |chcsl. |chem. |chil. |CSc. |csl. |círk. |dat. |dep. |des. |dial. |DiS.|dl. |doc. |dol. |dop. |dopr. |dosl. |dán. |dór. |děj. |dět. |ekon. |epic. |etnonym. |eufem. |ev. |event. |f. |fam. |fem. |fil. |film. |fin. |form. |fot. |fr. |fut. |fyz. |gen. |genmjr. |genplk. |genpor. |geogr. |geol. |geom. |germ. |gram. |hebr. |herald. |hist. |hl. |hod. |hor. |horn. |hovor. |hud. |hut. |ie. |imp. |impf. |ind. |indoevr. |inf. |Ing. |instr. |interj. |iron. |it. |ión. |j. č.|jap. |JUDr. |k. s.|kanad. |katalán. |klas. |kniž. |komp. |konj. |konkr. |kpt. |kr. |kuch. |kř. |lat. |les. |lid. |lit. |liturg. |log. |lok. |lék. |m. |mat. |meteor. |metr. |MgA. |Mgr. |mil. |mj. |mjr. |ml. |mld. |mn. č.|mod. |ms. |MUDr. |MVDr. |mysl. |n. |n. l.|např. |neklas. |nesklon. |než. |niz. |nom. |nor. |npor. |nprap. |nrtm. |nstržm. |náb. |nám. |námoř. |něm. |o. p. s.|o. s.|ob. |obch. |obyč. |odd. |odp. |ojed. |opt. |p. |p. n. l.|p. o.|P. S. |P. T. |part. |pas. |pejor. |pers. |pf. |PharmDr. |PhDr. |pl. |plk. |plpf. |po Kr.|pol. |pomn. |popř. |por. |pplk. |ppor. |pprap. |prap. |prep. |prof. |práv. |př. Kr.|př. n. l.|před n. l.|předl. |přivl. |r. |rak. |rcsl. |refl. |reg. |resp. |rkp. |RNDr. |roč. |RSDr. |rtm. |rtn. |rum. |rus. |s. |s. p.|s. r. o.|samohl. |Sb. |sg. |sl. |slang. |slov. |souhl. |spec. |spol. s r. o.|sport. |srov. |st. |stfr. |stol. |str. |stržm. |stsl. |střv. |subj. |subst. |superl. |sv. |svob. |sz. |t. r.|tech. |telev. |teol. |ThDr. |tis. |tj. |trans. |tur. |typogr. |tzn. |tzv. |táz. |v z.|v. o. s.|v. r.|v. v. i.|var. |vedl. |verb. |vl. jm. |voj. |vok. |vulg. |vztaž. |výtv. |vč. |vůb. |z. s.|zahr. |zast. |zejm. |zeměd. |zkr. |zn. |zvl. |zájm. |zř. |č. |č. j.|č. p. |čas. |čes. |čet. |čj. |čp. |čín. |čís. |ř. |řec. |říj. |škpt. |špan. |šprap. |št. prap. |švýc. )/i
+      input.gsub(abbreviations) { |abbr| abbr.gsub(/ /, ' ') }
+    end
+  end
+end

data/lib/truty/general.rb ADDED Viewed

@@ -0,0 +1,184 @@
+module Truty
+  # Module with general typography fixes for all the languages. The fixes in here should not be language specific.
+  # @author Matěj Kašpar Jirásek
+  module General
+    # Improves the typography of the large plain text with paragraphs. Adds non-breaking spaces, hyphenation, fixes dashes, etc.
+    #
+    # @param input [String] The text which will be converted.
+    # @param lang [String] Sets the language of hyphenation. (See {#add_soft_hyphens}.)
+    # @return [String] Text with improved typography.
+    def fix(input, lang = "en_us")
+      input.split("\n").collect { |p| fix_paragraph(p, lang) }.join("\n")
+    end
+    # Improves the typography of single paragraph. If you supply more paragraphs you might lose some improvements like widows. For improving longer text see {#fix}.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @param lang [String] Sets the language of hyphenation. (See {#add_soft_hyphens}.)
+    # @return [String] Paragraph with improved typography.
+    def fix_paragraph(input, lang = "en_us")
+      output = input
+      output = ellipsis(output)
+      output = fix_multicharacters(output)
+      output = fix_punctuation_whitespace(output)
+      output = fix_brackets_whitespace(output)
+      output = add_soft_hyphens(output, lang)
+      output = emdash_spaces(output)
+      output = endash_spaces(output)
+      output = fix_double_quotes(output, "„", "“")
+      output = fix_single_quotes(output, "‚", "‘")
+      output = fix_multiplication_sign(output)
+      output = fix_space_between_numbers(output)
+      output = fix_units(output)
+      output = fix_trailing_spaces(output)
+      output = fix_widows(output)
+    end
+    # Converts three or more periods (dots, points) into ellipsis.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with ellipses.
+    def ellipsis(input)
+      input.gsub(/\.{3,}/, "…")
+    end
+    # Adds thin spaces to emdash from both sides. Also converts two or three hyphens to emdash.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with corrected emdashes.
+    def emdash_spaces(input)
+      input.gsub(/\s+(—|-{2,3})\s+/, " — ")
+    end
+    # Adds non-breaking space before endash.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with corrected endashes.
+    def endash_spaces(input)
+      input.gsub(/\s+(–|-)\s+/, " – ")
+    end
+    # Adds soft hyphens to the input.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @param lang [String] Sets the language of hyphenation. One of the languages a {http://www.rubydoc.info/gems/text-hyphen/ text-hyphen gem} can use.
+    # @param left [Integer] Number of characters on the beginning of the words which cannnot be hyphenated.
+    # @param right [Integer] Number of characters on the beginning of the words which cannnot be hyphenated.
+    # @param char [Integer] The character which will be added to hyphenation places.
+    # @return [String] Paragraph with added hyphenation characters.
+    def add_soft_hyphens(input, lang = "en_us", left = 2, right = 2, char = "")
+      l = Text::Hyphen.new(:language => lang, :left => left, :right => right)
+      words = input.split(/[ ]+/m)
+      result = []
+      words.each_with_index do |w, n|
+        if !(w.length < 6 || n == words.size - 1 || w =~ URI::regexp || w =~ /\A[\w+\-.]+@[a-z\d\-]+(\.[a-z]+)*\.[a-z]+\z/i)
+          w = l.visualise(w, char)
+        end
+        result << w
+      end
+      result.join(" ")
+    end
+    # Converts simple double quotes to the typograhic ones.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @param start_quotes [String] The character used for starting quotes.
+    # @param end_quotes [String] The character used for ending quotes.
+    # @return [String] Paragraph with correct double quotes.
+    def fix_double_quotes(input, start_quotes = "“", end_quotes = "”")
+      input.gsub(/"[^"]*"/) { |s| start_quotes + s[1..-2].strip + end_quotes }
+    end
+    # Converts simple single quotes to the typograhic ones.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @param start_quotes [String] The character used for starting quotes.
+    # @param end_quotes [String] The character used for ending quotes.
+    # @return [String] Paragraph with correct single quotes.
+    def fix_single_quotes(input, start_quotes = "‘", end_quotes = "’")
+      input.gsub(/'[^']*'/) { |s| start_quotes + s[1..-2].strip + end_quotes }
+    end
+    # Adds multiplication sign between numbers instead of X.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with correct multiplication signs.
+    def fix_multiplication_sign(input)
+      output = input.gsub(/(\d+)\s{0,1}[Xx]\s{0,1}(\d+)/, '\1 × \2')
+      output = output.gsub(/(\d+)[Xx]/, '\1×')
+    end
+    # Adds thin non-breaking space between numbers.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with correct spaces between numbers.
+    def fix_space_between_numbers(input)
+      input.gsub(/(\d)\s+(\d)/, '\1 \2')
+    end
+    # Fixes spaces around various brackets.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with correct spaces around brackets.
+    def fix_brackets_whitespace(input)
+      output = input.gsub(/([\(\[\{])\s*/, '\1')
+      output = output.gsub(/\s*([\]\)\}])/, '\1')
+      output = output.gsub(/\s+([\(\[\{])\s*/, ' \1')
+      output = output.gsub(/\s*([\]\)\}])\s+/, '\1 ')
+    end
+    # Tries to substitute more characters which should be one, like "©", "™", etc.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with converted characters.
+    def fix_multicharacters(input)
+      output = input.gsub(/\([Cc]\)/, "©")
+      output = output.gsub(/\([Pp]\)/, "℗")
+      output = output.gsub(/\([Rr]\)/, "®")
+      output = output.gsub(/\((SM|sm|Sm)\)/, "℠")
+      output = output.gsub(/\((TM|tm|Tm)\)/, "™")
+      output = output.gsub(/\+-/, "±")
+      output = output.gsub(/-\+/, "∓")
+      output = output.gsub(/No.?\s*(\d+)/i, '№\1')
+      output = output.gsub(/°C/, '℃')
+      output = output.gsub(/°F/, '℉')
+    end
+    # Fixes spaces around punctuation.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with correct spaces around punctuation.
+    def fix_punctuation_whitespace(input)
+      input.gsub(/\s*([\!\?\.,;:…]+)\s*/, '\1 ')
+    end
+    # Fixes non-breaking spaces betwwen number and unit.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with correct spaces between number and unit.
+    def fix_units(input)
+      output = input.gsub(/(\d+)\s+(%|‰|‱|℃|℉|°|€|Kč|(Y|Z|E|P|T|G|M|k|h|da|d|m|µ|n|p|f|a|z|y)?(m(²|³)?|g|s|h|A|K|cd|mol|Ω|℃|℉))/, '\1 \2')
+      output.gsub(/(\*|§|#|†)\s+(\d+)/, '\1 \2')
+    end
+    # Adds non-breaking space before the last word in the paragraph.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph with removed widows.
+    def fix_widows(input)
+      input.gsub(/(\s)(\S+(\$|\z))/, ' \2')
+    end
+    # Removes whitespace after the end of the paragraph.
+    #
+    # @param input [String] The paragraph which will be converted.
+    # @return [String] Paragraph without trailing spaces.
+    def fix_trailing_spaces(input)
+      input.gsub(/\s*($|\z)/, '')
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,93 @@
+--- !ruby/object:Gem::Specification
+name: truty
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Matěj Kašpar Jirásek
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2015-01-04 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: text-hyphen
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.4'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.4'
+- !ruby/object:Gem::Dependency
+  name: simplecov
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
+- !ruby/object:Gem::Dependency
+  name: yard
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.8'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.8'
+description: A string converter which aims to correct the typography.
+email: matej.jirasek@me.com
+executables:
+- truty
+extensions: []
+extra_rdoc_files: []
+files:
+- LICENSE
+- README.md
+- bin/truty
+- lib/truty.rb
+- lib/truty/czech.rb
+- lib/truty/general.rb
+homepage: https://github.com/mkj-is/Truty
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.3
+signing_key:
+specification_version: 4
+summary: True typography converter
+test_files: []
+has_rdoc: