jtag 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,18 @@
1
+ ---
2
+ tagging:
3
+ - tags
4
+ - tags.app
5
+ webdesign:
6
+ - web design
7
+ - web development
8
+ - javascript
9
+ - jquery
10
+ - safari
11
+ - chrome
12
+ - firefox
13
+ mountainlion:
14
+ - Mountain Lion
15
+ - OS X 10.8
16
+ - "10.8"
17
+ lion:
18
+ - "10.7"
@@ -0,0 +1,179 @@
1
+ class JTag
2
+
3
+ def initialize(support_dir, config)
4
+ @support = support_dir
5
+ @min_matches = config["min_matches"] || 2
6
+ @tags_loc = config["tags_location"]
7
+ @blacklistfile = File.join(@support,"blacklist.txt")
8
+ @blacklist = IO.read(@blacklistfile).split("\n") || []
9
+ @skipwords = IO.read(File.join(support_dir,"stopwords.txt")).split("\n") || []
10
+ remote_tags = get_tags
11
+ @tags = {}
12
+ remote_tags.each {|tag| @tags[Text::PorterStemming.stem(tag).downcase] = tag if tag}
13
+ synonyms.each { |k,v|
14
+ @tags[k.to_s.downcase] = v unless @blacklist.include?(k.to_s.downcase)
15
+ }
16
+ end
17
+
18
+ def get_tags(options={})
19
+ blacklisted = options[:blacklisted] || false
20
+ counts = options[:counts] || false
21
+ host, path = @tags_loc.match(/^([^\/]+)(\/.*)/)[1,2]
22
+ tags = ""
23
+ http = Net::HTTP.new(host, 80)
24
+ http.start do |http|
25
+ request = Net::HTTP::Get.new(path)
26
+ response = http.request(request)
27
+ response.value
28
+ tags = response.body
29
+ end
30
+ tags = JSON.parse(tags)
31
+ if tags && tags.key?("tags")
32
+ if counts
33
+ return tags["tags_count"]
34
+ else
35
+ unless blacklisted
36
+ tags["tags"].delete_if {|tag| !tag || @blacklist.include?(tag.downcase) }
37
+ end
38
+ return tags["tags"]
39
+ end
40
+ else
41
+ return false
42
+ end
43
+ end
44
+
45
+ def synonyms
46
+ if File.exists?(File.join(@support,"synonyms.yml"))
47
+ syn = YAML::load(File.open(File.join(@support,"synonyms.yml")))
48
+ compiled = {}
49
+ syn.each {|k,v|
50
+ v.each {|synonym|
51
+ compiled[synonym] = k
52
+ }
53
+ }
54
+ else
55
+ return false
56
+ end
57
+ compiled
58
+ end
59
+
60
+ def split_post(file)
61
+ input = IO.read(file)
62
+ # Check to see if it's a full post with YAML headers
63
+ post_parts = input.split(/^---\s*$/)
64
+ raise "File has improper YAML header" unless post_parts.length == 3
65
+ after = post_parts[2].strip
66
+ yaml = YAML::load(input)
67
+ [yaml, after]
68
+ end
69
+
70
+ def post_tags(file)
71
+ if File.exists?(file)
72
+ input = IO.read(file)
73
+ yaml = YAML::load(input) || false
74
+ exit_now! "Invalid post header" unless yaml
75
+ return yaml["tags"] || []
76
+ else
77
+ raise "File #{file} does not exist"
78
+ end
79
+ end
80
+
81
+ def merge_tags(tags, merged, file)
82
+ current_tags = post_tags(file)
83
+ post_has_tag = false
84
+ tags.each {|tag|
85
+ if current_tags.include?(tag)
86
+ current_tags.delete(tag)
87
+ post_has_tag = true
88
+ end
89
+ }
90
+ return false unless post_has_tag
91
+ current_tags.push(merged)
92
+ current_tags.uniq!
93
+ current_tags.sort
94
+ end
95
+
96
+
97
+ def suggest(input)
98
+ yaml = YAML::load(input) || false
99
+ exit_now! "Invalid post header" unless yaml
100
+ current_tags = yaml["tags"] || []
101
+ title = yaml["title"] || ""
102
+ @content = (title + after).strip_all.strip_urls rescue input.strip_all.strip_urls
103
+ @words = split_words
104
+ @auto_tags = []
105
+ populate_auto_tags
106
+ @auto_tags.concat(current_tags).uniq!
107
+ end
108
+
109
+ def split_words
110
+ @content.gsub(/([\/\\]|\s+)/,' ').gsub(/[^A-Za-z0-9\s-]/,'').split(" ").delete_if { |word|
111
+ word =~ /^[^a-z]+$/ || word.length < 4
112
+ }.map! { |word|
113
+ Text::PorterStemming.stem(word).downcase
114
+ }.delete_if{ |word|
115
+ @skipwords.include?(word) && !@tags.keys.include?(word)
116
+ }
117
+ end
118
+
119
+ def populate_auto_tags
120
+ freqs = Hash.new(0)
121
+ @words.each { |word| freqs[word] += 1 }
122
+ freqs.delete_if {|k,v| v < @min_matches }
123
+
124
+ exit_with_message "No high frequency words", 1 if freqs.empty?
125
+
126
+ freqs.sort_by {|k,v| [v * -1, k] }.each {|word|
127
+ index = @tags.keys.index(word[0])
128
+ unless index.nil? || @blacklist.include?(@tags.keys[index])
129
+ @auto_tags.push(@tags[@tags.keys[index]]) unless index.nil?
130
+ end
131
+ }
132
+
133
+ @tags.each{|k,v|
134
+ occurrences = @content.scan(/\b#{k}\b/i)
135
+ if occurrences.count >= @min_matches
136
+ @auto_tags.push(v)
137
+ end
138
+ }
139
+ end
140
+
141
+ def blacklist(tags)
142
+ tags.each {|word|
143
+ @blacklist.push(word.downcase)
144
+ }
145
+ File.open(@blacklistfile,'w+') do |f|
146
+ f.puts @blacklist.uniq.sort.join("\n")
147
+ end
148
+ end
149
+
150
+ def unblacklist(tags)
151
+ tags.each {|word|
152
+ @blacklist.delete_if { |x| x == word }
153
+ }
154
+ File.open(@blacklistfile,'w+') do |f|
155
+ f.puts @blacklist.uniq.sort.join("\n")
156
+ end
157
+ end
158
+
159
+ def update_file_tags(file, tags)
160
+ begin
161
+ if File.exists?(file)
162
+ yaml, after = split_post(file)
163
+ yaml["tags"] = tags
164
+ File.open(file,'w+') do |f|
165
+ f.puts yaml.to_yaml
166
+ f.puts "---"
167
+ f.puts after
168
+ end
169
+ else
170
+ raise "File does not exist: #{file}"
171
+ end
172
+ return true
173
+ rescue Exception => e
174
+ raise e
175
+ return false
176
+ end
177
+ end
178
+ end
179
+
@@ -0,0 +1,171 @@
1
+ #
2
+ # This is the Porter Stemming algorithm, ported to Ruby from the
3
+ # version coded up in Perl. It's easy to follow against the rules
4
+ # in the original paper in:
5
+ #
6
+ # Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
7
+ # no. 3, pp 130-137,
8
+ #
9
+ # Taken from http://www.tartarus.org/~martin/PorterStemmer (Public Domain)
10
+ #
11
+ module Text # :nodoc:
12
+ module PorterStemming
13
+
14
+ STEP_2_LIST = {
15
+ 'ational' => 'ate', 'tional' => 'tion', 'enci' => 'ence', 'anci' => 'ance',
16
+ 'izer' => 'ize', 'bli' => 'ble',
17
+ 'alli' => 'al', 'entli' => 'ent', 'eli' => 'e', 'ousli' => 'ous',
18
+ 'ization' => 'ize', 'ation' => 'ate',
19
+ 'ator' => 'ate', 'alism' => 'al', 'iveness' => 'ive', 'fulness' => 'ful',
20
+ 'ousness' => 'ous', 'aliti' => 'al',
21
+ 'iviti' => 'ive', 'biliti' => 'ble', 'logi' => 'log'
22
+ }
23
+
24
+ STEP_3_LIST = {
25
+ 'icate' => 'ic', 'ative' => '', 'alize' => 'al', 'iciti' => 'ic',
26
+ 'ical' => 'ic', 'ful' => '', 'ness' => ''
27
+ }
28
+
29
+ SUFFIX_1_REGEXP = /(
30
+ ational |
31
+ tional |
32
+ enci |
33
+ anci |
34
+ izer |
35
+ bli |
36
+ alli |
37
+ entli |
38
+ eli |
39
+ ousli |
40
+ ization |
41
+ ation |
42
+ ator |
43
+ alism |
44
+ iveness |
45
+ fulness |
46
+ ousness |
47
+ aliti |
48
+ iviti |
49
+ biliti |
50
+ logi)$/x
51
+
52
+ SUFFIX_2_REGEXP = /(
53
+ al |
54
+ ance |
55
+ ence |
56
+ er |
57
+ ic |
58
+ able |
59
+ ible |
60
+ ant |
61
+ ement |
62
+ ment |
63
+ ent |
64
+ ou |
65
+ ism |
66
+ ate |
67
+ iti |
68
+ ous |
69
+ ive |
70
+ ize)$/x
71
+
72
+ C = "[^aeiou]" # consonant
73
+ V = "[aeiouy]" # vowel
74
+ CC = "#{C}(?>[^aeiouy]*)" # consonant sequence
75
+ VV = "#{V}(?>[aeiou]*)" # vowel sequence
76
+
77
+ MGR0 = /^(#{CC})?#{VV}#{CC}/o # [cc]vvcc... is m>0
78
+ MEQ1 = /^(#{CC})?#{VV}#{CC}(#{VV})?$/o # [cc]vvcc[vv] is m=1
79
+ MGR1 = /^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o # [cc]vvccvvcc... is m>1
80
+ VOWEL_IN_STEM = /^(#{CC})?#{V}/o # vowel in stem
81
+
82
+ def self.stem(word)
83
+
84
+ # make a copy of the given object and convert it to a string.
85
+ word = word.dup.to_str
86
+
87
+ return word if word.length < 3
88
+
89
+ # now map initial y to Y so that the patterns never treat it as vowel
90
+ word[0] = 'Y' if word[0] == ?y
91
+
92
+ # Step 1a
93
+ if word =~ /(ss|i)es$/
94
+ word = $` + $1
95
+ elsif word =~ /([^s])s$/
96
+ word = $` + $1
97
+ end
98
+
99
+ # Step 1b
100
+ if word =~ /eed$/
101
+ word.chop! if $` =~ MGR0
102
+ elsif word =~ /(ed|ing)$/
103
+ stem = $`
104
+ if stem =~ VOWEL_IN_STEM
105
+ word = stem
106
+ case word
107
+ when /(at|bl|iz)$/ then word << "e"
108
+ when /([^aeiouylsz])\1$/ then word.chop!
109
+ when /^#{CC}#{V}[^aeiouwxy]$/o then word << "e"
110
+ end
111
+ end
112
+ end
113
+
114
+ if word =~ /y$/
115
+ stem = $`
116
+ word = stem + "i" if stem =~ VOWEL_IN_STEM
117
+ end
118
+
119
+ # Step 2
120
+ if word =~ SUFFIX_1_REGEXP
121
+ stem = $`
122
+ suffix = $1
123
+ # print "stem= " + stem + "\n" + "suffix=" + suffix + "\n"
124
+ if stem =~ MGR0
125
+ word = stem + STEP_2_LIST[suffix]
126
+ end
127
+ end
128
+
129
+ # Step 3
130
+ if word =~ /(icate|ative|alize|iciti|ical|ful|ness)$/
131
+ stem = $`
132
+ suffix = $1
133
+ if stem =~ MGR0
134
+ word = stem + STEP_3_LIST[suffix]
135
+ end
136
+ end
137
+
138
+ # Step 4
139
+ if word =~ SUFFIX_2_REGEXP
140
+ stem = $`
141
+ if stem =~ MGR1
142
+ word = stem
143
+ end
144
+ elsif word =~ /(s|t)(ion)$/
145
+ stem = $` + $1
146
+ if stem =~ MGR1
147
+ word = stem
148
+ end
149
+ end
150
+
151
+ # Step 5
152
+ if word =~ /e$/
153
+ stem = $`
154
+ if (stem =~ MGR1) ||
155
+ (stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o)
156
+ word = stem
157
+ end
158
+ end
159
+
160
+ if word =~ /ll$/ && word =~ MGR1
161
+ word.chop!
162
+ end
163
+
164
+ # and turn initial Y back to y
165
+ word[0] = 'y' if word[0] == ?Y
166
+
167
+ word
168
+ end
169
+
170
+ end
171
+ end
@@ -0,0 +1,47 @@
1
+ class String
2
+ # convert "WikiLink" to "Wiki link"
3
+ def break_camel
4
+ return downcase if match(/\A[A-Z]+\z/)
5
+ gsub(/([A-Z]+)([A-Z][a-z])/, '\1 \2').
6
+ gsub(/([a-z])([A-Z])/, '\1 \2').
7
+ downcase
8
+ end
9
+
10
+ def strip_markdown
11
+ # strip all Markdown and Liquid tags
12
+ gsub(/\{%.*?%\}/,'').
13
+ gsub(/\[\^.+?\](\: .*?$)?/,'').
14
+ gsub(/\s{0,2}\[.*?\]: .*?$/,'').
15
+ gsub(/\!\[.*?\][\[\(].*?[\]\)]/,"").
16
+ gsub(/\[(.*?)\][\[\(].*?[\]\)]/,"\\1").
17
+ gsub(/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/,'').
18
+ gsub(/^\#{1,6}\s*/,'').
19
+ gsub(/(\*{1,2})(\S.*?\S)\1/,"\\2").
20
+ gsub(/(`{3,})(.*?)\1/m,"\\2").
21
+ gsub(/^-{3,}\s*$/,"").
22
+ gsub(/`(.+)`/,"\\1").
23
+ gsub(/\n{2,}/,"\n\n")
24
+ end
25
+
26
+ def strip_tags
27
+ return CGI.unescapeHTML(
28
+ gsub(/<(script|style|pre|code|figure).*?>.*?<\/\1>/im, '').
29
+ gsub(/<!--.*?-->/m, '').
30
+ gsub(/<(img|hr|br).*?>/i, " ").
31
+ gsub(/<(dd|a|h\d|p|small|b|i|blockquote|li)( [^>]*?)?>(.*?)<\/\1>/i, " \\3 ").
32
+ gsub(/<\/?(dt|a|ul|ol)( [^>]+)?>/i, " ").
33
+ gsub(/<[^>]+?>/, '').
34
+ gsub(/\[\d+\]/, '').
35
+ gsub(/&#8217;/,"'").gsub(/&.*?;/,' ').gsub(/;/,' ')
36
+ ).lstrip.gsub("\xE2\x80\x98","'").gsub("\xE2\x80\x99","'").gsub("\xCA\xBC","'").gsub("\xE2\x80\x9C",'"').gsub("\xE2\x80\x9D",'"').gsub("\xCB\xAE",'"').squeeze(" ")
37
+ end
38
+
39
+ def strip_urls
40
+ gsub(/(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?/i,"")
41
+ end
42
+
43
+ def strip_all
44
+ strip_tags.strip_markdown.strip
45
+ end
46
+
47
+ end
@@ -0,0 +1,3 @@
1
+ module Jtag
2
+ VERSION = '0.1.5'
3
+ end
metadata ADDED
@@ -0,0 +1,145 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jtag
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.5
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Brett Terpstra
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-08-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rdoc
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: aruba
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: gli
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - '='
68
+ - !ruby/object:Gem::Version
69
+ version: 2.7.0
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - '='
76
+ - !ruby/object:Gem::Version
77
+ version: 2.7.0
78
+ - !ruby/object:Gem::Dependency
79
+ name: json
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description:
95
+ email: me@brettterpstra.com
96
+ executables:
97
+ - jtag
98
+ extensions: []
99
+ extra_rdoc_files:
100
+ - README.rdoc
101
+ - jtag.rdoc
102
+ files:
103
+ - bin/jtag
104
+ - lib/jtag/version.rb
105
+ - lib/jtag/config_files/blacklist.txt
106
+ - lib/jtag/config_files/config.yml
107
+ - lib/jtag/config_files/stopwords.txt
108
+ - lib/jtag/config_files/synonyms.yml
109
+ - lib/jtag/porter_stemming.rb
110
+ - lib/jtag/jekylltag.rb
111
+ - lib/jtag/string.rb
112
+ - lib/jtag.rb
113
+ - README.rdoc
114
+ - jtag.rdoc
115
+ homepage: http://brettterpstra.com
116
+ licenses: []
117
+ post_install_message:
118
+ rdoc_options:
119
+ - --title
120
+ - jtag
121
+ - --main
122
+ - README.rdoc
123
+ - -ri
124
+ require_paths:
125
+ - lib
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ! '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ none: false
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ requirements: []
140
+ rubyforge_project:
141
+ rubygems_version: 1.8.25
142
+ signing_key:
143
+ specification_version: 3
144
+ summary: Auto-tagging and tagging tools for Jekyll
145
+ test_files: []