rack-seo 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +63 -0
- data/.rspec +1 -0
- data/.rvmrc +38 -0
- data/Gemfile +14 -0
- data/LICENSE.txt +20 -0
- data/README.md +145 -0
- data/Rakefile +1 -0
- data/config/rack_seo.default.yml +5 -0
- data/generators/rack_seo.sample.yml +32 -0
- data/lib/rack-seo/base.rb +57 -0
- data/lib/rack-seo/dispatcher.rb +19 -0
- data/lib/rack-seo/document.rb +107 -0
- data/lib/rack-seo/sanitize.rb +15 -0
- data/lib/rack-seo/summarizer.rb +40 -0
- data/lib/rack-seo/title_formatter.rb +9 -0
- data/lib/rack-seo/version.rb +5 -0
- data/lib/rack-seo.rb +16 -0
- data/lib/tasks/setup.rake +6 -0
- data/rack-seo.gemspec +24 -0
- data/spec/configuration_spec.rb +97 -0
- data/spec/fixtures/complex.html +437 -0
- data/spec/fixtures/simple.html +10 -0
- data/spec/rack-seo_spec.rb +107 -0
- data/spec/sample_configs/custom_paths.yml +16 -0
- data/spec/sample_configs/happy.yml +5 -0
- data/spec/sample_configs/sad.yml +5 -0
- data/spec/spec_helper.rb +45 -0
- metadata +104 -0
data/.gitignore
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
# rcov generated
|
2
|
+
coverage
|
3
|
+
coverage.data
|
4
|
+
|
5
|
+
# rdoc generated
|
6
|
+
rdoc
|
7
|
+
|
8
|
+
# yard generated
|
9
|
+
doc
|
10
|
+
.yardoc
|
11
|
+
|
12
|
+
# bundler
|
13
|
+
.bundle
|
14
|
+
bin/
|
15
|
+
|
16
|
+
# Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
|
17
|
+
#
|
18
|
+
# * Create a file at ~/.gitignore
|
19
|
+
# * Include files you want ignored
|
20
|
+
# * Run: git config --global core.excludesfile ~/.gitignore
|
21
|
+
#
|
22
|
+
# After doing this, these files will be ignored in all your git projects,
|
23
|
+
# saving you from having to 'pollute' every project you touch with them
|
24
|
+
#
|
25
|
+
# Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
|
26
|
+
#
|
27
|
+
# For MacOS:
|
28
|
+
#
|
29
|
+
#.DS_Store
|
30
|
+
|
31
|
+
*.gem
|
32
|
+
*.rbc
|
33
|
+
.bundle
|
34
|
+
.config
|
35
|
+
.yardoc
|
36
|
+
Gemfile.lock
|
37
|
+
InstalledFiles
|
38
|
+
_yardoc
|
39
|
+
coverage
|
40
|
+
doc/
|
41
|
+
lib/bundler/man
|
42
|
+
spec/reports
|
43
|
+
test/tmp
|
44
|
+
test/version_tmp
|
45
|
+
tmp
|
46
|
+
|
47
|
+
# For TextMate
|
48
|
+
*.tmproj
|
49
|
+
tmtags
|
50
|
+
|
51
|
+
# For emacs:
|
52
|
+
#*~
|
53
|
+
#\#*
|
54
|
+
#.\#*
|
55
|
+
|
56
|
+
# For vim:
|
57
|
+
*.swp
|
58
|
+
|
59
|
+
# For redcar:
|
60
|
+
#.redcar
|
61
|
+
|
62
|
+
# For rubinius:
|
63
|
+
#*.rbc
|
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/.rvmrc
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
# This is an RVM Project .rvmrc file, used to automatically load the ruby
|
4
|
+
# development environment upon cd'ing into the directory
|
5
|
+
|
6
|
+
# First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
|
7
|
+
# Only full ruby name is supported here, for short names use:
|
8
|
+
# echo "rvm use 1.9.3" > .rvmrc
|
9
|
+
environment_id="ruby-1.9.3-p194@rack-seo"
|
10
|
+
|
11
|
+
# Uncomment the following lines if you want to verify rvm version per project
|
12
|
+
# rvmrc_rvm_version="1.18.3 ()" # 1.10.1 seams as a safe start
|
13
|
+
# eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
|
14
|
+
# echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
|
15
|
+
# return 1
|
16
|
+
# }
|
17
|
+
|
18
|
+
# First we attempt to load the desired environment directly from the environment
|
19
|
+
# file. This is very fast and efficient compared to running through the entire
|
20
|
+
# CLI and selector. If you want feedback on which environment was used then
|
21
|
+
# insert the word 'use' after --create as this triggers verbose mode.
|
22
|
+
if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
|
23
|
+
&& -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
|
24
|
+
then
|
25
|
+
\. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
|
26
|
+
[[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
|
27
|
+
\. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
|
28
|
+
if [[ $- == *i* ]] # check for interactive shells
|
29
|
+
then echo "Using: $(tput setaf 2)$GEM_HOME$(tput sgr0)" # show the user the ruby and gemset they are using in green
|
30
|
+
else echo "Using: $GEM_HOME" # don't use colors in non-interactive shells
|
31
|
+
fi
|
32
|
+
else
|
33
|
+
# If the environment file has not yet been created, use the RVM CLI to select.
|
34
|
+
rvm --create use "$environment_id" || {
|
35
|
+
echo "Failed to create RVM environment '${environment_id}'."
|
36
|
+
return 1
|
37
|
+
}
|
38
|
+
fi
|
data/Gemfile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
gemspec
|
3
|
+
|
4
|
+
# Add dependencies to develop your gem here.
|
5
|
+
# Include everything needed to run rake, tests, features, etc.
|
6
|
+
group :development do
|
7
|
+
gem "rack-test", :require => "rack/test"
|
8
|
+
gem "pry"
|
9
|
+
gem "rspec", "~> 2.8.0"
|
10
|
+
gem "rdoc", "~> 3.12"
|
11
|
+
gem "bundler"
|
12
|
+
gem "jeweler", "~> 1.8.4"
|
13
|
+
gem "simplecov", :require => false
|
14
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2013 Xavier Riley
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
# Rack SEO
|
2
|
+
|
3
|
+
Rack SEO takes a page, analyses the content and provides relevant meta tags based on the words in the page.
|
4
|
+
It's easy to configure (via a YAML file) so you can control all your SEO
|
5
|
+
meta tags from one place. Works for any
|
6
|
+
existing Rack application (including Rails, Sinatra, Padrino etc.)
|
7
|
+
|
8
|
+
### Dear God, why?
|
9
|
+
|
10
|
+
SEO tags are a common client request and it seems like a lot of
|
11
|
+
time and effort is wasted on sub-par solutions. Often, these tags
|
12
|
+
are based on content that already appears in the page and this gem helps to
|
13
|
+
streamline that process. Also, it gets the implementation out of your app logic
|
14
|
+
so you can concentrate on doing more interesting stuff whilst keeping our search bot overlords happy.
|
15
|
+
|
16
|
+
If that still doesn't convince you, that's fine. My only other
|
17
|
+
reason was that my New Year's resolutions for 2013 included `write a gem`
|
18
|
+
and `write some Rack Middleware` so here it is.
|
19
|
+
|
20
|
+
## Features
|
21
|
+
|
22
|
+
* Provides keyword relevant title, meta-description and meta-keyword tags for every page
|
23
|
+
* Use CSS selectors to pull any content into the title tag
|
24
|
+
* Use CSS selectors to specify where to look for the meta description and keyword content e.g. "#intro"
|
25
|
+
* Fully configurable for each path your app, with wildcard matching using regular expressions
|
26
|
+
|
27
|
+
## Installation
|
28
|
+
|
29
|
+
```bash
|
30
|
+
#Assuming Mac OSX and Homebrew
|
31
|
+
brew update
|
32
|
+
brew install libxml2 # (you might need to brew link libxml2 as well)
|
33
|
+
brew install glib
|
34
|
+
|
35
|
+
#Follow instructions to install the summarize gem
|
36
|
+
#Don't just gem install as it's out of date on Rubygems
|
37
|
+
git clone https://github.com/ssoper/summarize.git
|
38
|
+
cd summarize
|
39
|
+
rake build
|
40
|
+
gem build summarize.gemspec
|
41
|
+
gem install summarize-1.0.4.gem
|
42
|
+
|
43
|
+
gem install rack-seo
|
44
|
+
```
|
45
|
+
|
46
|
+
## Usage
|
47
|
+
|
48
|
+
In your Gemfile:
|
49
|
+
```
|
50
|
+
gem 'rack-seo'
|
51
|
+
```
|
52
|
+
|
53
|
+
then run `bundle install`
|
54
|
+
|
55
|
+
and in your config.ru:
|
56
|
+
```
|
57
|
+
use Rack::RackSeo
|
58
|
+
```
|
59
|
+
|
60
|
+
or to specify your own config file
|
61
|
+
```
|
62
|
+
use Rack::RackSeo :config => "/path/to/config/rack_seo.yml"
|
63
|
+
```
|
64
|
+
|
65
|
+
## The config file
|
66
|
+
|
67
|
+
You can put the YAML config wherever you like but I would suggest
|
68
|
+
`config/rack_seo.yml` for convention. The format is as follows:
|
69
|
+
|
70
|
+
```yaml
|
71
|
+
---
|
72
|
+
# default is the fallback for any paths that you have not specified
|
73
|
+
# explicitly.
|
74
|
+
default:
|
75
|
+
# title_format is a ruby string which parses out anything between {{
|
76
|
+
# and }} as a CSS selector (using Nokogiri) and pulls out the inner_text
|
77
|
+
title_format: "{{h1}} - Acme Ltd"
|
78
|
+
# meta_description_selector lets you specify where to pull the text
|
79
|
+
# content from to extract the summary text. The default is #content,
|
80
|
+
# falling back to the <body> tag if that isn't present. You can
|
81
|
+
# specify your own below. It pulls out the inner text and should only
|
82
|
+
# match one div or item.
|
83
|
+
meta_description_selector: "#my_juicy_keyword_rich_summary_div"
|
84
|
+
# Same as the description selector, but automatically generates a
|
85
|
+
# comma separated list from the content provided
|
86
|
+
meta_keywords_selector: "#my_tag_stuffed_p_tag"
|
87
|
+
# The custom key (optional) contains any paths you'd like to specify.
|
88
|
+
# These are tested against the current path, longest first to match the
|
89
|
+
# most specific by default.
|
90
|
+
custom:
|
91
|
+
-
|
92
|
+
matcher: '/blog'
|
93
|
+
title_format: "The Acme Company Blog - {{#content h1}}"
|
94
|
+
meta_description_selector: "#post_content"
|
95
|
+
meta_keywords_selector: "#comments"
|
96
|
+
-
|
97
|
+
matcher: '/contact-us'
|
98
|
+
# Plain old strings are fine too
|
99
|
+
title_format: "How to contact us about faulty anvils"
|
100
|
+
# You can skip the other selectors if you're happy with the
|
101
|
+
# #content/<body> defaults
|
102
|
+
-
|
103
|
+
# Matching wildcards works through regular expressions, though
|
104
|
+
# it's not pretty
|
105
|
+
matcher: !ruby/regexp '/\/news\/.*/'
|
106
|
+
title_format: "{{.article_body h3}} - Acme News"
|
107
|
+
```
|
108
|
+
|
109
|
+
## Caveats
|
110
|
+
|
111
|
+
* Processes on every request, so be sure to use caching in production
|
112
|
+
* Uses the `summarize` gem, which is a wrapper around ["The Open Text
|
113
|
+
Summarizer"](http://libots.sourceforge.net/). This has dependencies of
|
114
|
+
its own (see installation) so will probably not work out of the box on
|
115
|
+
Heroku.
|
116
|
+
|
117
|
+
## Credits
|
118
|
+
|
119
|
+
I originally did a proof of concept for this using the [Rack
|
120
|
+
Pagespeed](http://rack-pagespeed.heroku.com/)
|
121
|
+
middleware so thanks to @julio_ody for his work on that.
|
122
|
+
|
123
|
+
The clever stuff (text summarization and keyword extraction) is all
|
124
|
+
handled by the summarize gem at the moment so credit to @ssoper and
|
125
|
+
LibOTS for their work. Other summarizers (maybe even in pure Ruby) are
|
126
|
+
a focus going forward.
|
127
|
+
|
128
|
+
## Contributing to Rack SEO
|
129
|
+
|
130
|
+
Contributors are very welcome! Use Github issues for feature requests and other suggestions/improvements.
|
131
|
+
This project uses RSpec for tests.
|
132
|
+
|
133
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
134
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
135
|
+
* Fork the project.
|
136
|
+
* Start a feature/bugfix branch.
|
137
|
+
* Commit and push until you are happy with your contribution.
|
138
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
139
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
140
|
+
|
141
|
+
## Copyright
|
142
|
+
|
143
|
+
Copyright (c) 2013 Xavier Riley. See LICENSE.txt for
|
144
|
+
further details.
|
145
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,32 @@
|
|
1
|
+
---
|
2
|
+
# default is the fallback for any paths that you have not specified
|
3
|
+
# explicitly.
|
4
|
+
default:
|
5
|
+
# title_format is a ruby string which parses out anything between {{
|
6
|
+
# and }} as a CSS selector (using Nokogiri)
|
7
|
+
title_format: "{{h1}} - Acme Ltd"
|
8
|
+
# meta_description_selector lets you specify where to pull the text
|
9
|
+
# content from to extract the summary text. The default is #content,
|
10
|
+
# falling back to the <body> tag if that isn't present. You can
|
11
|
+
# specify your own below. It pulls out the inner text and should only
|
12
|
+
# match one div or item.
|
13
|
+
meta_description_selector: "#my_juicy_keyword_rich_summary_div"
|
14
|
+
# Same as the description selector, but automatically generates a
|
15
|
+
# comma separated list from the content provided
|
16
|
+
meta_keywords_selector: "#my_tag_stuffed_p_tag"
|
17
|
+
# The custom key (optional) contains any paths you'd like to specify.
|
18
|
+
# These are tested against the current path, longest first to match the
|
19
|
+
# most specific by default.
|
20
|
+
custom:
|
21
|
+
-
|
22
|
+
matcher: '/blog'
|
23
|
+
title_format: "The Acme Company Blog - {{#content h1}}"
|
24
|
+
meta_description_selector: "#post_content"
|
25
|
+
meta_keywords_selector: "#comments"
|
26
|
+
-
|
27
|
+
matcher: '/contact-us'
|
28
|
+
# Plain old strings are fine too
|
29
|
+
title_format: "How to contact us about faulty anvils"
|
30
|
+
# You can skip the other selectors if you're happy with the
|
31
|
+
# #content/<body> defaults
|
32
|
+
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Rack
|
2
|
+
module RackSeo
|
3
|
+
class Base
|
4
|
+
attr_accessor :config
|
5
|
+
attr_accessor :current_path
|
6
|
+
attr_accessor :dispatcher
|
7
|
+
|
8
|
+
def initialize app, options, &block
|
9
|
+
@app = app
|
10
|
+
if options[:config]
|
11
|
+
@config = YAML.load(IO.read(options[:config]))
|
12
|
+
else
|
13
|
+
@config = YAML.load(IO.read("config/rack_seo.default.yml"))
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def call env
|
18
|
+
# Setup document body ready to process
|
19
|
+
status, headers, response = @app.call(env)
|
20
|
+
return [status, headers, response] unless headers['Content-Type'] =~ /html/
|
21
|
+
body = ""; response.each do |part| body << part end
|
22
|
+
|
23
|
+
document = Rack::RackSeo::Document.new(body)
|
24
|
+
current_path = env['PATH_INFO'] || '/'
|
25
|
+
execute!(document, current_path)
|
26
|
+
|
27
|
+
body = document.to_html
|
28
|
+
headers['Content-Length'] = body.length.to_s if headers['Content-Length'] # still UTF-8 unsafe
|
29
|
+
[status, headers, [body]]
|
30
|
+
end
|
31
|
+
|
32
|
+
def execute!(document, current_path = '/')
|
33
|
+
@dispatcher = RackSeo::Dispatcher.new(@config, current_path)
|
34
|
+
set_meta_title(document, @dispatcher.title_format)
|
35
|
+
set_meta_description(document, @dispatcher.description_selector)
|
36
|
+
set_meta_keywords(document, @dispatcher.keywords_selector)
|
37
|
+
end
|
38
|
+
|
39
|
+
def set_meta_title(document, title_format)
|
40
|
+
content = Rack::RackSeo::TitleFormatter.parse_meta_title(document, title_format)
|
41
|
+
content = Rack::RackSeo::Sanitize.sanitize_meta_title(content)
|
42
|
+
document.title_content = content
|
43
|
+
end
|
44
|
+
|
45
|
+
def set_meta_description(document, description_selector)
|
46
|
+
content = Rack::RackSeo::Summarizer.extract_description(document, description_selector)
|
47
|
+
document.description_content = content
|
48
|
+
end
|
49
|
+
|
50
|
+
def set_meta_keywords(document, keywords_selector)
|
51
|
+
content = Rack::RackSeo::Summarizer.extract_keywords(document, keywords_selector)
|
52
|
+
document.keywords_content = content
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class Rack::RackSeo::Dispatcher
|
2
|
+
attr_accessor :title_format
|
3
|
+
attr_accessor :description_selector
|
4
|
+
attr_accessor :keywords_selector
|
5
|
+
|
6
|
+
def initialize(config, current_path)
|
7
|
+
if config["custom"]
|
8
|
+
matching_path = config["custom"].detect do |custom_path|
|
9
|
+
(custom_path["matcher"].is_a?(String) && current_path.include?(custom_path["matcher"])) or
|
10
|
+
(custom_path["matcher"].is_a?(Regexp) && current_path =~ custom_path["matcher"])
|
11
|
+
end
|
12
|
+
end
|
13
|
+
matching_path ||= {}
|
14
|
+
@title_format = matching_path["title_format"] || config["default"]["title_format"]
|
15
|
+
@description_selector = matching_path["meta_description_selector"] || config["default"]["meta_description_selector"]
|
16
|
+
@keywords_selector = matching_path["meta_keywords_selector"] || config["default"]["meta_keywords_selector"]
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
class Rack::RackSeo::Document < Nokogiri::HTML::Document
|
2
|
+
# This is a wrapper for the Nokogiri parsed page
|
3
|
+
# Provides some convenience methods for working
|
4
|
+
# with seo meta tags
|
5
|
+
attr_accessor :title, :desc, :keywords
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def new(*args)
|
9
|
+
doc = parse(args.first)
|
10
|
+
setup_meta_tags(doc)
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse(string_or_io, url = nil, encoding = 'utf-8', options = Nokogiri::XML::ParseOptions::RECOVER)
|
14
|
+
super(string_or_io, url, encoding, options)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def title_content
|
19
|
+
self.title_tag.text
|
20
|
+
end
|
21
|
+
|
22
|
+
def title_content=(content)
|
23
|
+
title_tag.content = content
|
24
|
+
end
|
25
|
+
|
26
|
+
def title_tag
|
27
|
+
self.at('title')
|
28
|
+
end
|
29
|
+
|
30
|
+
def description_content=(content)
|
31
|
+
self.description_tag['content'] = content unless description_tag.nil?
|
32
|
+
end
|
33
|
+
|
34
|
+
def description_content
|
35
|
+
self.description_tag.attr('content') unless description_tag.nil?
|
36
|
+
end
|
37
|
+
|
38
|
+
def description_tag
|
39
|
+
self.at_css("meta[name='description']")
|
40
|
+
end
|
41
|
+
|
42
|
+
def keywords_content=(content)
|
43
|
+
keywords_tag['content'] = content unless keywords_tag.nil?
|
44
|
+
end
|
45
|
+
|
46
|
+
def keywords_content
|
47
|
+
keywords_tag.attr('content') unless keywords_tag.nil?
|
48
|
+
end
|
49
|
+
|
50
|
+
def keywords_tag
|
51
|
+
self.at_css("meta[name='keywords']")
|
52
|
+
end
|
53
|
+
|
54
|
+
class << self
|
55
|
+
private
|
56
|
+
|
57
|
+
def setup_meta_tags(document)
|
58
|
+
create_html_root_node(document) unless document.root.name == "html"
|
59
|
+
create_doc_head(document) unless document.at_css("head")
|
60
|
+
create_meta_title(document) unless document.at_css("title")
|
61
|
+
create_meta_desc(document) unless document.at_css("meta[name='description']")
|
62
|
+
create_meta_desc_content(document) unless document.at_css("meta[name='description']")['content']
|
63
|
+
create_meta_keywords(document) unless document.at_css("meta[name='keywords']")
|
64
|
+
create_meta_keywords_content(document) unless document.at_css("meta[name='keywords']")['keywords']
|
65
|
+
document
|
66
|
+
end
|
67
|
+
|
68
|
+
def create_html_root_node(document)
|
69
|
+
document.root.wrap('<html></html>')
|
70
|
+
end
|
71
|
+
|
72
|
+
def create_doc_head(document)
|
73
|
+
doc_head = Nokogiri::XML::Element.new('head', document)
|
74
|
+
document.root.children.first.before doc_head
|
75
|
+
end
|
76
|
+
|
77
|
+
def create_meta_title(document)
|
78
|
+
meta_title = Nokogiri::XML::Element.new('title', document)
|
79
|
+
document.at('head').add_child meta_title
|
80
|
+
end
|
81
|
+
|
82
|
+
def create_meta_desc(document)
|
83
|
+
meta_desc = Nokogiri::XML::Element.new('meta', document)
|
84
|
+
meta_desc['name'] = "description"
|
85
|
+
meta_desc['content'] ||= ""
|
86
|
+
document.at('head').children.first.after meta_desc
|
87
|
+
end
|
88
|
+
|
89
|
+
def create_meta_keywords(document)
|
90
|
+
meta_keywords = Nokogiri::XML::Element.new('meta', document)
|
91
|
+
meta_keywords['name'] = "keywords"
|
92
|
+
meta_keywords['content'] ||= ""
|
93
|
+
document.at('head').children.last.after meta_keywords
|
94
|
+
end
|
95
|
+
|
96
|
+
def create_meta_desc_content(document)
|
97
|
+
meta_desc = document.description_tag
|
98
|
+
meta_desc['content'] ||= ""
|
99
|
+
end
|
100
|
+
|
101
|
+
def create_meta_keywords_content(document)
|
102
|
+
meta_keywords = document.keywords_tag
|
103
|
+
meta_keywords['content'] ||= ""
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Rack::RackSeo::Sanitize
|
2
|
+
def self.sanitize_meta_title(title)
|
3
|
+
title.to_s.gsub(/\s+/, ' ').strip
|
4
|
+
end
|
5
|
+
|
6
|
+
def sanitize_meta_description(meta_description)
|
7
|
+
meta_description.to_s.gsub(/[\s]+/, ' ').gsub(/[\r|\n]+/, ' ').strip
|
8
|
+
end
|
9
|
+
|
10
|
+
def sanitize_meta_keywords(keywords)
|
11
|
+
keywords.split(",").collect { |keyword|
|
12
|
+
keyword.downcase.gsub(/\s+/, '')
|
13
|
+
}.reject(&:empty?).join(',')
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
class Rack::RackSeo::Summarizer
|
2
|
+
class << self
|
3
|
+
include Rack::RackSeo::Sanitize
|
4
|
+
|
5
|
+
def extract_description(document, selector)
|
6
|
+
sanitize_meta_description get_description(document, selector)
|
7
|
+
end
|
8
|
+
|
9
|
+
def extract_keywords(document, selector)
|
10
|
+
sanitize_meta_keywords get_keywords(document, selector)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
def self.get_description(document, selector)
|
16
|
+
get_selected_elements(document, selector).map {|element|
|
17
|
+
element.inner_text
|
18
|
+
}.join(' ').summarize(:ratio => 1).strip
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.get_keywords(document, selector)
|
22
|
+
get_selected_elements(document, selector).map {|element|
|
23
|
+
element.inner_text
|
24
|
+
}.join(' ').summarize(:topics => true).last
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.get_selected_elements(document, selector)
|
28
|
+
begin
|
29
|
+
elements = document.css(selector)
|
30
|
+
rescue Nokogiri::CSS::SyntaxError => e
|
31
|
+
document.css('body')
|
32
|
+
end
|
33
|
+
|
34
|
+
if elements.nil? || elements.empty?
|
35
|
+
document.css('body')
|
36
|
+
else
|
37
|
+
elements
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/rack-seo.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'yaml'
|
3
|
+
require 'summarize'
|
4
|
+
require 'rack'
|
5
|
+
require 'rack-seo/version'
|
6
|
+
require 'rack-seo/base'
|
7
|
+
require 'rack-seo/dispatcher'
|
8
|
+
require 'rack-seo/document'
|
9
|
+
require 'rack-seo/sanitize'
|
10
|
+
require 'rack-seo/summarizer'
|
11
|
+
require 'rack-seo/title_formatter'
|
12
|
+
|
13
|
+
module Rack
|
14
|
+
module RackSeo
|
15
|
+
end
|
16
|
+
end
|
data/rack-seo.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'rack-seo/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "rack-seo"
|
8
|
+
gem.version = Rack::Seo::VERSION
|
9
|
+
gem.homepage = "http://github.com/xavriley/rack-seo"
|
10
|
+
gem.license = "MIT"
|
11
|
+
gem.summary = "Generate and manage meta tags on the fly using Rack Middleware"
|
12
|
+
gem.description = %q{Lets you extract sensible default content for meta tags using the markup from that page.}
|
13
|
+
gem.email = ["xavriley@github.com"]
|
14
|
+
gem.authors = ["Xavier Riley"]
|
15
|
+
|
16
|
+
gem.files = `git ls-files`.split($/)
|
17
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
18
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
19
|
+
gem.require_paths = ["lib"]
|
20
|
+
|
21
|
+
gem.add_dependency('nokogiri', ["~> 1.5.0"])
|
22
|
+
gem.add_dependency('summarize', ["~> 1.0.3"])
|
23
|
+
end
|
24
|
+
|