jtag 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ ---
2
+ tagging:
3
+ - tags
4
+ - tags.app
5
+ webdesign:
6
+ - web design
7
+ - web development
8
+ - javascript
9
+ - jquery
10
+ - safari
11
+ - chrome
12
+ - firefox
13
+ mountainlion:
14
+ - Mountain Lion
15
+ - OS X 10.8
16
+ - "10.8"
17
+ lion:
18
+ - "10.7"
@@ -0,0 +1,179 @@
1
+ class JTag
2
+
3
+ def initialize(support_dir, config)
4
+ @support = support_dir
5
+ @min_matches = config["min_matches"] || 2
6
+ @tags_loc = config["tags_location"]
7
+ @blacklistfile = File.join(@support,"blacklist.txt")
8
+ @blacklist = IO.read(@blacklistfile).split("\n") || []
9
+ @skipwords = IO.read(File.join(support_dir,"stopwords.txt")).split("\n") || []
10
+ remote_tags = get_tags
11
+ @tags = {}
12
+ remote_tags.each {|tag| @tags[Text::PorterStemming.stem(tag).downcase] = tag if tag}
13
+ synonyms.each { |k,v|
14
+ @tags[k.to_s.downcase] = v unless @blacklist.include?(k.to_s.downcase)
15
+ }
16
+ end
17
+
18
+ def get_tags(options={})
19
+ blacklisted = options[:blacklisted] || false
20
+ counts = options[:counts] || false
21
+ host, path = @tags_loc.match(/^([^\/]+)(\/.*)/)[1,2]
22
+ tags = ""
23
+ http = Net::HTTP.new(host, 80)
24
+ http.start do |http|
25
+ request = Net::HTTP::Get.new(path)
26
+ response = http.request(request)
27
+ response.value
28
+ tags = response.body
29
+ end
30
+ tags = JSON.parse(tags)
31
+ if tags && tags.key?("tags")
32
+ if counts
33
+ return tags["tags_count"]
34
+ else
35
+ unless blacklisted
36
+ tags["tags"].delete_if {|tag| !tag || @blacklist.include?(tag.downcase) }
37
+ end
38
+ return tags["tags"]
39
+ end
40
+ else
41
+ return false
42
+ end
43
+ end
44
+
45
+ def synonyms
46
+ if File.exists?(File.join(@support,"synonyms.yml"))
47
+ syn = YAML::load(File.open(File.join(@support,"synonyms.yml")))
48
+ compiled = {}
49
+ syn.each {|k,v|
50
+ v.each {|synonym|
51
+ compiled[synonym] = k
52
+ }
53
+ }
54
+ else
55
+ return false
56
+ end
57
+ compiled
58
+ end
59
+
60
+ def split_post(file)
61
+ input = IO.read(file)
62
+ # Check to see if it's a full post with YAML headers
63
+ post_parts = input.split(/^---\s*$/)
64
+ raise "File has improper YAML header" unless post_parts.length == 3
65
+ after = post_parts[2].strip
66
+ yaml = YAML::load(input)
67
+ [yaml, after]
68
+ end
69
+
70
+ def post_tags(file)
71
+ if File.exists?(file)
72
+ input = IO.read(file)
73
+ yaml = YAML::load(input) || false
74
+ exit_now! "Invalid post header" unless yaml
75
+ return yaml["tags"] || []
76
+ else
77
+ raise "File #{file} does not exist"
78
+ end
79
+ end
80
+
81
+ def merge_tags(tags, merged, file)
82
+ current_tags = post_tags(file)
83
+ post_has_tag = false
84
+ tags.each {|tag|
85
+ if current_tags.include?(tag)
86
+ current_tags.delete(tag)
87
+ post_has_tag = true
88
+ end
89
+ }
90
+ return false unless post_has_tag
91
+ current_tags.push(merged)
92
+ current_tags.uniq!
93
+ current_tags.sort
94
+ end
95
+
96
+
97
+ def suggest(input)
98
+ yaml = YAML::load(input) || false
99
+ exit_now! "Invalid post header" unless yaml
100
+ current_tags = yaml["tags"] || []
101
+ title = yaml["title"] || ""
102
+ @content = (title + after).strip_all.strip_urls rescue input.strip_all.strip_urls
103
+ @words = split_words
104
+ @auto_tags = []
105
+ populate_auto_tags
106
+ @auto_tags.concat(current_tags).uniq!
107
+ end
108
+
109
+ def split_words
110
+ @content.gsub(/([\/\\]|\s+)/,' ').gsub(/[^A-Za-z0-9\s-]/,'').split(" ").delete_if { |word|
111
+ word =~ /^[^a-z]+$/ || word.length < 4
112
+ }.map! { |word|
113
+ Text::PorterStemming.stem(word).downcase
114
+ }.delete_if{ |word|
115
+ @skipwords.include?(word) && !@tags.keys.include?(word)
116
+ }
117
+ end
118
+
119
+ def populate_auto_tags
120
+ freqs = Hash.new(0)
121
+ @words.each { |word| freqs[word] += 1 }
122
+ freqs.delete_if {|k,v| v < @min_matches }
123
+
124
+ exit_with_message "No high frequency words", 1 if freqs.empty?
125
+
126
+ freqs.sort_by {|k,v| [v * -1, k] }.each {|word|
127
+ index = @tags.keys.index(word[0])
128
+ unless index.nil? || @blacklist.include?(@tags.keys[index])
129
+ @auto_tags.push(@tags[@tags.keys[index]]) unless index.nil?
130
+ end
131
+ }
132
+
133
+ @tags.each{|k,v|
134
+ occurrences = @content.scan(/\b#{k}\b/i)
135
+ if occurrences.count >= @min_matches
136
+ @auto_tags.push(v)
137
+ end
138
+ }
139
+ end
140
+
141
+ def blacklist(tags)
142
+ tags.each {|word|
143
+ @blacklist.push(word.downcase)
144
+ }
145
+ File.open(@blacklistfile,'w+') do |f|
146
+ f.puts @blacklist.uniq.sort.join("\n")
147
+ end
148
+ end
149
+
150
+ def unblacklist(tags)
151
+ tags.each {|word|
152
+ @blacklist.delete_if { |x| x == word }
153
+ }
154
+ File.open(@blacklistfile,'w+') do |f|
155
+ f.puts @blacklist.uniq.sort.join("\n")
156
+ end
157
+ end
158
+
159
+ def update_file_tags(file, tags)
160
+ begin
161
+ if File.exists?(file)
162
+ yaml, after = split_post(file)
163
+ yaml["tags"] = tags
164
+ File.open(file,'w+') do |f|
165
+ f.puts yaml.to_yaml
166
+ f.puts "---"
167
+ f.puts after
168
+ end
169
+ else
170
+ raise "File does not exist: #{file}"
171
+ end
172
+ return true
173
+ rescue Exception => e
174
+ raise e
175
+ return false
176
+ end
177
+ end
178
+ end
179
+
@@ -0,0 +1,171 @@
1
+ #
2
+ # This is the Porter Stemming algorithm, ported to Ruby from the
3
+ # version coded up in Perl. It's easy to follow against the rules
4
+ # in the original paper in:
5
+ #
6
+ # Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
7
+ # no. 3, pp 130-137,
8
+ #
9
+ # Taken from http://www.tartarus.org/~martin/PorterStemmer (Public Domain)
10
+ #
11
+ module Text # :nodoc:
12
+ module PorterStemming
13
+
14
+ STEP_2_LIST = {
15
+ 'ational' => 'ate', 'tional' => 'tion', 'enci' => 'ence', 'anci' => 'ance',
16
+ 'izer' => 'ize', 'bli' => 'ble',
17
+ 'alli' => 'al', 'entli' => 'ent', 'eli' => 'e', 'ousli' => 'ous',
18
+ 'ization' => 'ize', 'ation' => 'ate',
19
+ 'ator' => 'ate', 'alism' => 'al', 'iveness' => 'ive', 'fulness' => 'ful',
20
+ 'ousness' => 'ous', 'aliti' => 'al',
21
+ 'iviti' => 'ive', 'biliti' => 'ble', 'logi' => 'log'
22
+ }
23
+
24
+ STEP_3_LIST = {
25
+ 'icate' => 'ic', 'ative' => '', 'alize' => 'al', 'iciti' => 'ic',
26
+ 'ical' => 'ic', 'ful' => '', 'ness' => ''
27
+ }
28
+
29
+ SUFFIX_1_REGEXP = /(
30
+ ational |
31
+ tional |
32
+ enci |
33
+ anci |
34
+ izer |
35
+ bli |
36
+ alli |
37
+ entli |
38
+ eli |
39
+ ousli |
40
+ ization |
41
+ ation |
42
+ ator |
43
+ alism |
44
+ iveness |
45
+ fulness |
46
+ ousness |
47
+ aliti |
48
+ iviti |
49
+ biliti |
50
+ logi)$/x
51
+
52
+ SUFFIX_2_REGEXP = /(
53
+ al |
54
+ ance |
55
+ ence |
56
+ er |
57
+ ic |
58
+ able |
59
+ ible |
60
+ ant |
61
+ ement |
62
+ ment |
63
+ ent |
64
+ ou |
65
+ ism |
66
+ ate |
67
+ iti |
68
+ ous |
69
+ ive |
70
+ ize)$/x
71
+
72
+ C = "[^aeiou]" # consonant
73
+ V = "[aeiouy]" # vowel
74
+ CC = "#{C}(?>[^aeiouy]*)" # consonant sequence
75
+ VV = "#{V}(?>[aeiou]*)" # vowel sequence
76
+
77
+ MGR0 = /^(#{CC})?#{VV}#{CC}/o # [cc]vvcc... is m>0
78
+ MEQ1 = /^(#{CC})?#{VV}#{CC}(#{VV})?$/o # [cc]vvcc[vv] is m=1
79
+ MGR1 = /^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o # [cc]vvccvvcc... is m>1
80
+ VOWEL_IN_STEM = /^(#{CC})?#{V}/o # vowel in stem
81
+
82
+ def self.stem(word)
83
+
84
+ # make a copy of the given object and convert it to a string.
85
+ word = word.dup.to_str
86
+
87
+ return word if word.length < 3
88
+
89
+ # now map initial y to Y so that the patterns never treat it as vowel
90
+ word[0] = 'Y' if word[0] == ?y
91
+
92
+ # Step 1a
93
+ if word =~ /(ss|i)es$/
94
+ word = $` + $1
95
+ elsif word =~ /([^s])s$/
96
+ word = $` + $1
97
+ end
98
+
99
+ # Step 1b
100
+ if word =~ /eed$/
101
+ word.chop! if $` =~ MGR0
102
+ elsif word =~ /(ed|ing)$/
103
+ stem = $`
104
+ if stem =~ VOWEL_IN_STEM
105
+ word = stem
106
+ case word
107
+ when /(at|bl|iz)$/ then word << "e"
108
+ when /([^aeiouylsz])\1$/ then word.chop!
109
+ when /^#{CC}#{V}[^aeiouwxy]$/o then word << "e"
110
+ end
111
+ end
112
+ end
113
+
114
+ if word =~ /y$/
115
+ stem = $`
116
+ word = stem + "i" if stem =~ VOWEL_IN_STEM
117
+ end
118
+
119
+ # Step 2
120
+ if word =~ SUFFIX_1_REGEXP
121
+ stem = $`
122
+ suffix = $1
123
+ # print "stem= " + stem + "\n" + "suffix=" + suffix + "\n"
124
+ if stem =~ MGR0
125
+ word = stem + STEP_2_LIST[suffix]
126
+ end
127
+ end
128
+
129
+ # Step 3
130
+ if word =~ /(icate|ative|alize|iciti|ical|ful|ness)$/
131
+ stem = $`
132
+ suffix = $1
133
+ if stem =~ MGR0
134
+ word = stem + STEP_3_LIST[suffix]
135
+ end
136
+ end
137
+
138
+ # Step 4
139
+ if word =~ SUFFIX_2_REGEXP
140
+ stem = $`
141
+ if stem =~ MGR1
142
+ word = stem
143
+ end
144
+ elsif word =~ /(s|t)(ion)$/
145
+ stem = $` + $1
146
+ if stem =~ MGR1
147
+ word = stem
148
+ end
149
+ end
150
+
151
+ # Step 5
152
+ if word =~ /e$/
153
+ stem = $`
154
+ if (stem =~ MGR1) ||
155
+ (stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o)
156
+ word = stem
157
+ end
158
+ end
159
+
160
+ if word =~ /ll$/ && word =~ MGR1
161
+ word.chop!
162
+ end
163
+
164
+ # and turn initial Y back to y
165
+ word[0] = 'y' if word[0] == ?Y
166
+
167
+ word
168
+ end
169
+
170
+ end
171
+ end
@@ -0,0 +1,47 @@
1
+ class String
2
+ # convert "WikiLink" to "Wiki link"
3
+ def break_camel
4
+ return downcase if match(/\A[A-Z]+\z/)
5
+ gsub(/([A-Z]+)([A-Z][a-z])/, '\1 \2').
6
+ gsub(/([a-z])([A-Z])/, '\1 \2').
7
+ downcase
8
+ end
9
+
10
+ def strip_markdown
11
+ # strip all Markdown and Liquid tags
12
+ gsub(/\{%.*?%\}/,'').
13
+ gsub(/\[\^.+?\](\: .*?$)?/,'').
14
+ gsub(/\s{0,2}\[.*?\]: .*?$/,'').
15
+ gsub(/\!\[.*?\][\[\(].*?[\]\)]/,"").
16
+ gsub(/\[(.*?)\][\[\(].*?[\]\)]/,"\\1").
17
+ gsub(/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/,'').
18
+ gsub(/^\#{1,6}\s*/,'').
19
+ gsub(/(\*{1,2})(\S.*?\S)\1/,"\\2").
20
+ gsub(/(`{3,})(.*?)\1/m,"\\2").
21
+ gsub(/^-{3,}\s*$/,"").
22
+ gsub(/`(.+)`/,"\\1").
23
+ gsub(/\n{2,}/,"\n\n")
24
+ end
25
+
26
+ def strip_tags
27
+ return CGI.unescapeHTML(
28
+ gsub(/<(script|style|pre|code|figure).*?>.*?<\/\1>/im, '').
29
+ gsub(/<!--.*?-->/m, '').
30
+ gsub(/<(img|hr|br).*?>/i, " ").
31
+ gsub(/<(dd|a|h\d|p|small|b|i|blockquote|li)( [^>]*?)?>(.*?)<\/\1>/i, " \\3 ").
32
+ gsub(/<\/?(dt|a|ul|ol)( [^>]+)?>/i, " ").
33
+ gsub(/<[^>]+?>/, '').
34
+ gsub(/\[\d+\]/, '').
35
+ gsub(/&#8217;/,"'").gsub(/&.*?;/,' ').gsub(/;/,' ')
36
+ ).lstrip.gsub("\xE2\x80\x98","'").gsub("\xE2\x80\x99","'").gsub("\xCA\xBC","'").gsub("\xE2\x80\x9C",'"').gsub("\xE2\x80\x9D",'"').gsub("\xCB\xAE",'"').squeeze(" ")
37
+ end
38
+
39
+ def strip_urls
40
+ gsub(/(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?/i,"")
41
+ end
42
+
43
+ def strip_all
44
+ strip_tags.strip_markdown.strip
45
+ end
46
+
47
+ end
@@ -0,0 +1,3 @@
1
+ module Jtag
2
+ VERSION = '0.1.5'
3
+ end
metadata ADDED
@@ -0,0 +1,145 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jtag
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.5
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Brett Terpstra
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-08-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rdoc
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: aruba
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: gli
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - '='
68
+ - !ruby/object:Gem::Version
69
+ version: 2.7.0
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - '='
76
+ - !ruby/object:Gem::Version
77
+ version: 2.7.0
78
+ - !ruby/object:Gem::Dependency
79
+ name: json
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description:
95
+ email: me@brettterpstra.com
96
+ executables:
97
+ - jtag
98
+ extensions: []
99
+ extra_rdoc_files:
100
+ - README.rdoc
101
+ - jtag.rdoc
102
+ files:
103
+ - bin/jtag
104
+ - lib/jtag/version.rb
105
+ - lib/jtag/config_files/blacklist.txt
106
+ - lib/jtag/config_files/config.yml
107
+ - lib/jtag/config_files/stopwords.txt
108
+ - lib/jtag/config_files/synonyms.yml
109
+ - lib/jtag/porter_stemming.rb
110
+ - lib/jtag/jekylltag.rb
111
+ - lib/jtag/string.rb
112
+ - lib/jtag.rb
113
+ - README.rdoc
114
+ - jtag.rdoc
115
+ homepage: http://brettterpstra.com
116
+ licenses: []
117
+ post_install_message:
118
+ rdoc_options:
119
+ - --title
120
+ - jtag
121
+ - --main
122
+ - README.rdoc
123
+ - -ri
124
+ require_paths:
125
+ - lib
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ! '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ none: false
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ requirements: []
140
+ rubyforge_project:
141
+ rubygems_version: 1.8.25
142
+ signing_key:
143
+ specification_version: 3
144
+ summary: Auto-tagging and tagging tools for Jekyll
145
+ test_files: []