RubyGems - jtag - Versions diffs - 0.1.5 - Mend

jtag 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/README.rdoc +8 -0
data/bin/jtag +428 -0
data/jtag.rdoc +159 -0
data/lib/jtag.rb +13 -0
data/lib/jtag/config_files/blacklist.txt +0 -0
data/lib/jtag/config_files/config.yml +2 -0
data/lib/jtag/config_files/stopwords.txt +782 -0
data/lib/jtag/config_files/synonyms.yml +18 -0
data/lib/jtag/jekylltag.rb +179 -0
data/lib/jtag/porter_stemming.rb +171 -0
data/lib/jtag/string.rb +47 -0
data/lib/jtag/version.rb +3 -0
metadata +145 -0

data/lib/jtag/config_files/synonyms.yml ADDED

@@ -0,0 +1,18 @@
+---
+tagging:
+- tags
+- tags.app
+webdesign:
+- web design
+- web development
+- javascript
+- jquery
+- safari
+- chrome
+- firefox
+mountainlion:
+- Mountain Lion
+- OS X 10.8
+- "10.8"
+lion:
+- "10.7"

data/lib/jtag/jekylltag.rb ADDED

@@ -0,0 +1,179 @@
+class JTag
+  def initialize(support_dir, config)
+    @support = support_dir
+    @min_matches = config["min_matches"] || 2
+    @tags_loc = config["tags_location"]
+    @blacklistfile = File.join(@support,"blacklist.txt")
+    @blacklist = IO.read(@blacklistfile).split("\n") || []
+    @skipwords = IO.read(File.join(support_dir,"stopwords.txt")).split("\n") || []
+    remote_tags = get_tags
+    @tags = {}
+    remote_tags.each {|tag| @tags[Text::PorterStemming.stem(tag).downcase] = tag if tag}
+    synonyms.each { |k,v|
+      @tags[k.to_s.downcase] = v unless @blacklist.include?(k.to_s.downcase)
+    }
+  end
+  def get_tags(options={})
+    blacklisted = options[:blacklisted] || false
+    counts = options[:counts] || false
+    host, path = @tags_loc.match(/^([^\/]+)(\/.*)/)[1,2]
+    tags = ""
+    http = Net::HTTP.new(host, 80)
+    http.start do |http|
+      request = Net::HTTP::Get.new(path)
+      response = http.request(request)
+      response.value
+      tags = response.body
+    end
+    tags = JSON.parse(tags)
+    if tags && tags.key?("tags")
+      if counts
+        return tags["tags_count"]
+      else
+        unless blacklisted
+          tags["tags"].delete_if {|tag| !tag || @blacklist.include?(tag.downcase) }
+        end
+        return tags["tags"]
+      end
+    else
+      return false
+    end
+  end
+  def synonyms
+    if File.exists?(File.join(@support,"synonyms.yml"))
+      syn = YAML::load(File.open(File.join(@support,"synonyms.yml")))
+      compiled = {}
+      syn.each {|k,v|
+        v.each {|synonym|
+          compiled[synonym] = k
+        }
+      }
+    else
+      return false
+    end
+    compiled
+  end
+  def split_post(file)
+    input = IO.read(file)
+    # Check to see if it's a full post with YAML headers
+    post_parts = input.split(/^---\s*$/)
+    raise "File has improper YAML header" unless post_parts.length == 3
+    after = post_parts[2].strip
+    yaml = YAML::load(input)
+    [yaml, after]
+  end
+  def post_tags(file)
+    if File.exists?(file)
+      input = IO.read(file)
+      yaml = YAML::load(input) || false
+      exit_now! "Invalid post header" unless yaml
+      return yaml["tags"] || []
+    else
+      raise "File #{file} does not exist"
+    end
+  end
+  def merge_tags(tags, merged, file)
+    current_tags = post_tags(file)
+    post_has_tag = false
+    tags.each {|tag|
+      if current_tags.include?(tag)
+        current_tags.delete(tag)
+        post_has_tag = true
+      end
+    }
+    return false unless post_has_tag
+    current_tags.push(merged)
+    current_tags.uniq!
+    current_tags.sort
+  end
+  def suggest(input)
+    yaml = YAML::load(input) || false
+    exit_now! "Invalid post header" unless yaml
+    current_tags = yaml["tags"] || []
+    title = yaml["title"] || ""
+    @content = (title + after).strip_all.strip_urls rescue input.strip_all.strip_urls
+    @words = split_words
+    @auto_tags = []
+    populate_auto_tags
+    @auto_tags.concat(current_tags).uniq!
+  end
+  def split_words
+    @content.gsub(/([\/\\]|\s+)/,' ').gsub(/[^A-Za-z0-9\s-]/,'').split(" ").delete_if { |word|
+      word =~ /^[^a-z]+$/ || word.length < 4
+    }.map! { |word|
+      Text::PorterStemming.stem(word).downcase
+    }.delete_if{ |word|
+      @skipwords.include?(word) && !@tags.keys.include?(word)
+    }
+  end
+  def populate_auto_tags
+    freqs = Hash.new(0)
+    @words.each { |word| freqs[word] += 1 }
+    freqs.delete_if {|k,v| v < @min_matches }
+    exit_with_message "No high frequency words", 1 if freqs.empty?
+    freqs.sort_by {|k,v| [v * -1, k] }.each {|word|
+      index = @tags.keys.index(word[0])
+      unless index.nil? || @blacklist.include?(@tags.keys[index])
+        @auto_tags.push(@tags[@tags.keys[index]]) unless index.nil?
+      end
+    }
+    @tags.each{|k,v|
+      occurrences = @content.scan(/\b#{k}\b/i)
+      if occurrences.count >= @min_matches
+        @auto_tags.push(v)
+      end
+    }
+  end
+  def blacklist(tags)
+    tags.each {|word|
+      @blacklist.push(word.downcase)
+    }
+    File.open(@blacklistfile,'w+') do |f|
+      f.puts @blacklist.uniq.sort.join("\n")
+    end
+  end
+  def unblacklist(tags)
+    tags.each {|word|
+      @blacklist.delete_if { |x| x == word }
+    }
+    File.open(@blacklistfile,'w+') do |f|
+      f.puts @blacklist.uniq.sort.join("\n")
+    end
+  end
+  def update_file_tags(file, tags)
+    begin
+      if File.exists?(file)
+        yaml, after = split_post(file)
+        yaml["tags"] = tags
+        File.open(file,'w+') do |f|
+          f.puts yaml.to_yaml
+          f.puts "---"
+          f.puts after
+        end
+      else
+        raise "File does not exist: #{file}"
+      end
+      return true
+    rescue Exception => e
+      raise e
+      return false
+    end
+  end
+end

data/lib/jtag/porter_stemming.rb ADDED

@@ -0,0 +1,171 @@
+#
+# This is the Porter Stemming algorithm, ported to Ruby from the
+# version coded up in Perl.  It's easy to follow against the rules
+# in the original paper in:
+#
+#   Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+#   no. 3, pp 130-137,
+#
+# Taken from http://www.tartarus.org/~martin/PorterStemmer (Public Domain)
+#
+module Text # :nodoc:
+module PorterStemming
+  STEP_2_LIST = {
+    'ational' => 'ate', 'tional' => 'tion', 'enci' => 'ence', 'anci' => 'ance',
+    'izer' => 'ize', 'bli' => 'ble',
+    'alli' => 'al', 'entli' => 'ent', 'eli' => 'e', 'ousli' => 'ous',
+    'ization' => 'ize', 'ation' => 'ate',
+    'ator' => 'ate', 'alism' => 'al', 'iveness' => 'ive', 'fulness' => 'ful',
+    'ousness' => 'ous', 'aliti' => 'al',
+    'iviti' => 'ive', 'biliti' => 'ble', 'logi' => 'log'
+  }
+  STEP_3_LIST = {
+    'icate' => 'ic', 'ative' => '', 'alize' => 'al', 'iciti' => 'ic',
+    'ical' => 'ic', 'ful' => '', 'ness' => ''
+  }
+  SUFFIX_1_REGEXP = /(
+                    ational  |
+                    tional   |
+                    enci     |
+                    anci     |
+                    izer     |
+                    bli      |
+                    alli     |
+                    entli    |
+                    eli      |
+                    ousli    |
+                    ization  |
+                    ation    |
+                    ator     |
+                    alism    |
+                    iveness  |
+                    fulness  |
+                    ousness  |
+                    aliti    |
+                    iviti    |
+                    biliti   |
+                    logi)$/x
+  SUFFIX_2_REGEXP = /(
+                      al       |
+                      ance     |
+                      ence     |
+                      er       |
+                      ic       |
+                      able     |
+                      ible     |
+                      ant      |
+                      ement    |
+                      ment     |
+                      ent      |
+                      ou       |
+                      ism      |
+                      ate      |
+                      iti      |
+                      ous      |
+                      ive      |
+                      ize)$/x
+  C = "[^aeiou]"             # consonant
+  V = "[aeiouy]"             # vowel
+  CC = "#{C}(?>[^aeiouy]*)"  # consonant sequence
+  VV = "#{V}(?>[aeiou]*)"    # vowel sequence
+  MGR0 = /^(#{CC})?#{VV}#{CC}/o                # [cc]vvcc... is m>0
+  MEQ1 = /^(#{CC})?#{VV}#{CC}(#{VV})?$/o       # [cc]vvcc[vv] is m=1
+  MGR1 = /^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o      # [cc]vvccvvcc... is m>1
+  VOWEL_IN_STEM   = /^(#{CC})?#{V}/o           # vowel in stem
+  def self.stem(word)
+    # make a copy of the given object and convert it to a string.
+    word = word.dup.to_str
+    return word if word.length < 3
+    # now map initial y to Y so that the patterns never treat it as vowel
+    word[0] = 'Y' if word[0] == ?y
+    # Step 1a
+    if word =~ /(ss|i)es$/
+      word = $` + $1
+    elsif word =~ /([^s])s$/
+      word = $` + $1
+    end
+    # Step 1b
+    if word =~ /eed$/
+      word.chop! if $` =~ MGR0
+    elsif word =~ /(ed|ing)$/
+      stem = $`
+      if stem =~ VOWEL_IN_STEM
+        word = stem
+        case word
+          when /(at|bl|iz)$/             then word << "e"
+          when /([^aeiouylsz])\1$/       then word.chop!
+          when /^#{CC}#{V}[^aeiouwxy]$/o then word << "e"
+        end
+      end
+    end
+    if word =~ /y$/
+      stem = $`
+      word = stem + "i" if stem =~ VOWEL_IN_STEM
+    end
+    # Step 2
+    if word =~ SUFFIX_1_REGEXP
+      stem = $`
+      suffix = $1
+      # print "stem= " + stem + "\n" + "suffix=" + suffix + "\n"
+      if stem =~ MGR0
+        word = stem + STEP_2_LIST[suffix]
+      end
+    end
+    # Step 3
+    if word =~ /(icate|ative|alize|iciti|ical|ful|ness)$/
+      stem = $`
+      suffix = $1
+      if stem =~ MGR0
+        word = stem + STEP_3_LIST[suffix]
+      end
+    end
+    # Step 4
+    if word =~ SUFFIX_2_REGEXP
+      stem = $`
+      if stem =~ MGR1
+        word = stem
+      end
+    elsif word =~ /(s|t)(ion)$/
+      stem = $` + $1
+      if stem =~ MGR1
+        word = stem
+      end
+    end
+    #  Step 5
+    if word =~ /e$/
+      stem = $`
+      if (stem =~ MGR1) ||
+          (stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o)
+        word = stem
+      end
+    end
+    if word =~ /ll$/ && word =~ MGR1
+      word.chop!
+    end
+    # and turn initial Y back to y
+    word[0] = 'y' if word[0] == ?Y
+    word
+  end
+end
+end

data/lib/jtag/string.rb ADDED

@@ -0,0 +1,47 @@
+class String
+  # convert "WikiLink" to "Wiki link"
+  def break_camel
+    return downcase if match(/\A[A-Z]+\z/)
+    gsub(/([A-Z]+)([A-Z][a-z])/, '\1 \2').
+    gsub(/([a-z])([A-Z])/, '\1 \2').
+    downcase
+  end
+  def strip_markdown
+    # strip all Markdown and Liquid tags
+    gsub(/\{%.*?%\}/,'').
+    gsub(/\[\^.+?\](\: .*?$)?/,'').
+    gsub(/\s{0,2}\[.*?\]: .*?$/,'').
+    gsub(/\!\[.*?\][\[\(].*?[\]\)]/,"").
+    gsub(/\[(.*?)\][\[\(].*?[\]\)]/,"\\1").
+    gsub(/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/,'').
+    gsub(/^\#{1,6}\s*/,'').
+    gsub(/(\*{1,2})(\S.*?\S)\1/,"\\2").
+    gsub(/(`{3,})(.*?)\1/m,"\\2").
+    gsub(/^-{3,}\s*$/,"").
+    gsub(/`(.+)`/,"\\1").
+    gsub(/\n{2,}/,"\n\n")
+  end
+  def strip_tags
+    return CGI.unescapeHTML(
+        gsub(/<(script|style|pre|code|figure).*?>.*?<\/\1>/im, '').
+        gsub(/<!--.*?-->/m, '').
+        gsub(/<(img|hr|br).*?>/i, " ").
+        gsub(/<(dd|a|h\d|p|small|b|i|blockquote|li)( [^>]*?)?>(.*?)<\/\1>/i, " \\3 ").
+        gsub(/<\/?(dt|a|ul|ol)( [^>]+)?>/i, " ").
+        gsub(/<[^>]+?>/, '').
+        gsub(/\[\d+\]/, '').
+        gsub(/&#8217;/,"'").gsub(/&.*?;/,' ').gsub(/;/,' ')
+    ).lstrip.gsub("\xE2\x80\x98","'").gsub("\xE2\x80\x99","'").gsub("\xCA\xBC","'").gsub("\xE2\x80\x9C",'"').gsub("\xE2\x80\x9D",'"').gsub("\xCB\xAE",'"').squeeze(" ")
+  end
+  def strip_urls
+    gsub(/(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?/i,"")
+  end
+  def strip_all
+    strip_tags.strip_markdown.strip
+  end
+end

data/lib/jtag/version.rb ADDED

@@ -0,0 +1,3 @@
+module Jtag
+  VERSION = '0.1.5'
+end

metadata ADDED

@@ -0,0 +1,145 @@
+--- !ruby/object:Gem::Specification
+name: jtag
+version: !ruby/object:Gem::Version
+  version: 0.1.5
+  prerelease:
+platform: ruby
+authors:
+- Brett Terpstra
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2013-08-26 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: rdoc
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: aruba
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: gli
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - '='
+      - !ruby/object:Gem::Version
+        version: 2.7.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - '='
+      - !ruby/object:Gem::Version
+        version: 2.7.0
+- !ruby/object:Gem::Dependency
+  name: json
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+description:
+email: me@brettterpstra.com
+executables:
+- jtag
+extensions: []
+extra_rdoc_files:
+- README.rdoc
+- jtag.rdoc
+files:
+- bin/jtag
+- lib/jtag/version.rb
+- lib/jtag/config_files/blacklist.txt
+- lib/jtag/config_files/config.yml
+- lib/jtag/config_files/stopwords.txt
+- lib/jtag/config_files/synonyms.yml
+- lib/jtag/porter_stemming.rb
+- lib/jtag/jekylltag.rb
+- lib/jtag/string.rb
+- lib/jtag.rb
+- README.rdoc
+- jtag.rdoc
+homepage: http://brettterpstra.com
+licenses: []
+post_install_message:
+rdoc_options:
+- --title
+- jtag
+- --main
+- README.rdoc
+- -ri
+require_paths:
+- lib
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.25
+signing_key:
+specification_version: 3
+summary: Auto-tagging and tagging tools for Jekyll
+test_files: []