perron 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +166 -7
- data/app/helpers/feeds_helper.rb +5 -0
- data/app/helpers/meta_tags_helper.rb +4 -6
- data/app/helpers/perron/markdown_helper.rb +5 -1
- data/lib/generators/perron/install_generator.rb +1 -1
- data/lib/generators/perron/templates/README.md.tt +16 -6
- data/lib/perron/configuration.rb +14 -5
- data/lib/perron/errors.rb +4 -0
- data/lib/perron/feeds.rb +38 -0
- data/lib/perron/html_processor/base.rb +15 -0
- data/lib/perron/html_processor/lazy_load_images.rb +13 -0
- data/lib/perron/html_processor/target_blank.rb +4 -8
- data/lib/perron/html_processor.rb +30 -8
- data/lib/perron/markdown.rb +2 -2
- data/lib/perron/metatags.rb +21 -13
- data/lib/perron/site/builder/feeds/json.rb +52 -0
- data/lib/perron/site/builder/feeds/rss.rb +55 -0
- data/lib/perron/site/builder/feeds.rb +41 -0
- data/lib/perron/site/builder/sitemap.rb +54 -0
- data/lib/perron/site/builder.rb +6 -0
- data/lib/perron/site/collection.rb +6 -1
- data/lib/perron/site/data/proxy.rb +17 -0
- data/lib/perron/site/data.rb +39 -73
- data/lib/perron/site/resource/configuration.rb +58 -0
- data/lib/perron/site/resource/related/stop_words.rb +34 -0
- data/lib/perron/site/resource/related.rb +99 -0
- data/lib/perron/site/resource.rb +7 -1
- data/lib/perron/site.rb +4 -2
- data/lib/perron/version.rb +1 -1
- data/lib/perron.rb +1 -0
- data/perron.gemspec +1 -1
- metadata +16 -4
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
|
5
|
+
module Perron
|
6
|
+
module Site
|
7
|
+
class Builder
|
8
|
+
class Feeds
|
9
|
+
class Json
|
10
|
+
def initialize(collection:)
|
11
|
+
@collection = collection
|
12
|
+
@configuration = Perron.configuration
|
13
|
+
end
|
14
|
+
|
15
|
+
def generate
|
16
|
+
return nil if resources.empty?
|
17
|
+
|
18
|
+
hash = Rails.application.routes.url_helpers.with_options(@configuration.default_url_options) do |url|
|
19
|
+
{
|
20
|
+
version: "https://jsonfeed.org/version/1.1",
|
21
|
+
title: @configuration.site_name,
|
22
|
+
home_page_url: @configuration.url,
|
23
|
+
description: @configuration.site_description,
|
24
|
+
items: resources.map do |resource|
|
25
|
+
{
|
26
|
+
id: resource.id,
|
27
|
+
url: url.polymorphic_url(resource),
|
28
|
+
date_published: (resource.metadata.published_at || resource.metadata.updated_at)&.iso8601,
|
29
|
+
title: resource.metadata.title,
|
30
|
+
content_html: Perron::Markdown.render(resource.content)
|
31
|
+
}
|
32
|
+
end
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
JSON.pretty_generate hash
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def resources
|
42
|
+
@resources ||= @collection.resources
|
43
|
+
.reject { it.metadata.feed == false }
|
44
|
+
.sort_by { it.metadata.published_at || it.metadata.updated_at || Time.current }
|
45
|
+
.reverse
|
46
|
+
.take(@collection.configuration.feeds.json.max_items)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "nokogiri"
|
4
|
+
|
5
|
+
module Perron
|
6
|
+
module Site
|
7
|
+
class Builder
|
8
|
+
class Feeds
|
9
|
+
class Rss
|
10
|
+
def initialize(collection:)
|
11
|
+
@collection = collection
|
12
|
+
@configuration = Perron.configuration
|
13
|
+
end
|
14
|
+
|
15
|
+
def generate
|
16
|
+
return if resources.empty?
|
17
|
+
|
18
|
+
Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
|
19
|
+
xml.rss(:version => "2.0", "xmlns:atom" => "http://www.w3.org/2005/Atom") do
|
20
|
+
xml.channel do
|
21
|
+
xml.title @configuration.site_name
|
22
|
+
xml.description @configuration.site_description
|
23
|
+
xml.link @configuration.url
|
24
|
+
xml.generator "Perron (#{Perron::VERSION})"
|
25
|
+
|
26
|
+
Rails.application.routes.url_helpers.with_options(@configuration.default_url_options) do |url|
|
27
|
+
resources.each do |resource|
|
28
|
+
xml.item do
|
29
|
+
xml.guid resource.id
|
30
|
+
xml.link url.polymorphic_url(resource), isPermaLink: true
|
31
|
+
xml.pubDate((resource.metadata.published_at || resource.metadata.updated_at)&.rfc822)
|
32
|
+
xml.title resource.metadata.title
|
33
|
+
xml.description { xml.cdata(Perron::Markdown.render(resource.content)) }
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end.to_xml
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def resources
|
45
|
+
@resource ||= @collection.resources
|
46
|
+
.reject { it.metadata.feed == false }
|
47
|
+
.sort_by { it.metadata.published_at || it.metadata.updated_at || Time.current }
|
48
|
+
.reverse
|
49
|
+
.take(@collection.configuration.feeds.rss.max_items)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "perron/site/builder/feeds/rss"
|
4
|
+
require "perron/site/builder/feeds/json"
|
5
|
+
|
6
|
+
module Perron
|
7
|
+
module Site
|
8
|
+
class Builder
|
9
|
+
class Feeds
|
10
|
+
def initialize(output_path)
|
11
|
+
@output_path = output_path
|
12
|
+
end
|
13
|
+
|
14
|
+
def generate
|
15
|
+
Perron::Site.collections.each do |collection|
|
16
|
+
config = collection.configuration.feeds
|
17
|
+
|
18
|
+
if config.rss.enabled
|
19
|
+
create_file at: config.rss.path, with: Rss.new(collection: collection).generate
|
20
|
+
end
|
21
|
+
|
22
|
+
if config.json.enabled
|
23
|
+
create_file at: config.json.path, with: Json.new(collection: collection).generate
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def create_file(at:, with:)
|
31
|
+
return if with.blank?
|
32
|
+
|
33
|
+
path = @output_path.join(at)
|
34
|
+
|
35
|
+
FileUtils.mkdir_p(File.dirname(path))
|
36
|
+
File.write(path, with)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Perron
|
4
|
+
module Site
|
5
|
+
class Builder
|
6
|
+
class Sitemap
|
7
|
+
def initialize(output_path)
|
8
|
+
@output_path = output_path
|
9
|
+
end
|
10
|
+
|
11
|
+
def generate
|
12
|
+
return if !Perron.configuration.sitemap.enabled
|
13
|
+
|
14
|
+
xml = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |builder|
|
15
|
+
builder.urlset(xmlns: "http://www.sitemaps.org/schemas/sitemap/0.9") do
|
16
|
+
Perron::Site.collections.each do |collection|
|
17
|
+
add_urls_for(collection, with: builder)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end.to_xml
|
21
|
+
|
22
|
+
File.write(@output_path.join("sitemap.xml"), xml)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def add_urls_for(collection, with:)
|
28
|
+
return if collection.configuration.sitemap.exclude == true
|
29
|
+
|
30
|
+
collection.resources.each do |resource|
|
31
|
+
next if resource.metadata.sitemap == false
|
32
|
+
|
33
|
+
root = resource.slug == "/"
|
34
|
+
priority = resource.metadata.sitemap_priority || collection.configuration.sitemap.priority || Perron.configuration.sitemap.priority
|
35
|
+
change_frequency = resource.metadata.sitemap_change_frequency || collection.configuration.sitemap.change_frequency || Perron.configuration.sitemap.change_frequency
|
36
|
+
|
37
|
+
Rails.application.routes.url_helpers.with_options(Perron.configuration.default_url_options) do |url|
|
38
|
+
with.url do
|
39
|
+
with.loc root ? url.root_url : url.polymorphic_url(resource)
|
40
|
+
with.priority priority
|
41
|
+
with.changefreq change_frequency
|
42
|
+
begin
|
43
|
+
with.lastmod resource.metadata.updated_at.iso8601
|
44
|
+
rescue
|
45
|
+
Time.current.iso8601
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/perron/site/builder.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "perron/site/builder/assets"
|
4
|
+
require "perron/site/builder/sitemap"
|
5
|
+
require "perron/site/builder/feeds"
|
4
6
|
require "perron/site/builder/public_files"
|
5
7
|
require "perron/site/builder/paths"
|
6
8
|
require "perron/site/builder/page"
|
@@ -15,6 +17,7 @@ module Perron
|
|
15
17
|
def build
|
16
18
|
if Perron.configuration.mode.standalone?
|
17
19
|
puts "🧹 Cleaning previous build…"
|
20
|
+
|
18
21
|
FileUtils.rm_rf(Dir.glob("#{@output_path}/*"))
|
19
22
|
|
20
23
|
Perron::Site::Builder::Assets.new.prepare
|
@@ -26,6 +29,9 @@ module Perron
|
|
26
29
|
|
27
30
|
paths.each { render_page(it) }
|
28
31
|
|
32
|
+
Perron::Site::Builder::Sitemap.new(@output_path).generate
|
33
|
+
Perron::Site::Builder::Feeds.new(@output_path).generate
|
34
|
+
|
29
35
|
puts "-" * 15
|
30
36
|
puts "✅ Build complete"
|
31
37
|
end
|
@@ -6,16 +6,21 @@ module Perron
|
|
6
6
|
|
7
7
|
def initialize(name)
|
8
8
|
@name = name
|
9
|
-
@collection_path = File.join(
|
9
|
+
@collection_path = File.join(Perron.configuration.input, name)
|
10
10
|
|
11
11
|
raise Errors::CollectionNotFoundError, "No such collection: #{name}" unless File.exist?(@collection_path) && File.directory?(@collection_path)
|
12
12
|
end
|
13
13
|
|
14
|
+
def configuration(resource_class = "Content::#{name.classify}".safe_constantize)
|
15
|
+
resource_class.configuration
|
16
|
+
end
|
17
|
+
|
14
18
|
def all(resource_class = "Content::#{name.classify}".safe_constantize)
|
15
19
|
@all ||= Dir.glob("#{@collection_path}/**/*.*").map do |file_path|
|
16
20
|
resource_class.new(file_path)
|
17
21
|
end.select(&:published?)
|
18
22
|
end
|
23
|
+
alias_method :resources, :all
|
19
24
|
|
20
25
|
def find(slug, resource_class = Resource)
|
21
26
|
resource = all(resource_class).find { it.slug == slug }
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Perron
|
4
|
+
class Data
|
5
|
+
class Proxy
|
6
|
+
def method_missing(method_name, *arguments, &block)
|
7
|
+
raise ArgumentError, "Data `#{method_name}` does not accept arguments" if arguments.any?
|
8
|
+
|
9
|
+
Perron::Data.new(method_name.to_s)
|
10
|
+
end
|
11
|
+
|
12
|
+
def respond_to_missing?(method_name, include_private = false)
|
13
|
+
true
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/perron/site/data.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "erb"
|
4
|
+
require "singleton"
|
5
|
+
|
3
6
|
require "csv"
|
4
7
|
|
5
8
|
module Perron
|
@@ -29,25 +32,39 @@ module Perron
|
|
29
32
|
end
|
30
33
|
|
31
34
|
def records
|
32
|
-
content =
|
33
|
-
|
34
|
-
parser = PARSER_METHODS.fetch(extension) do
|
35
|
-
raise Errors::UnsupportedDataFormatError, "Unsupported data format: #{extension}"
|
36
|
-
end
|
37
|
-
|
38
|
-
data = send(parser, content)
|
35
|
+
content = rendered_from(@file_path)
|
36
|
+
data = parsed_from(content, @file_path)
|
39
37
|
|
40
38
|
unless data.is_a?(Array)
|
41
39
|
raise Errors::DataParseError, "Data in '#{@file_path}' must be an array of objects."
|
42
40
|
end
|
43
41
|
|
44
42
|
data.map { Item.new(it) }
|
43
|
+
end
|
44
|
+
|
45
|
+
def rendered_from(path)
|
46
|
+
raw_content = File.read(path)
|
47
|
+
|
48
|
+
render_erb(raw_content)
|
49
|
+
rescue NameError, ArgumentError, SyntaxError => error
|
50
|
+
raise Errors::DataParseError, "Failed to render ERB in `#{path}`: (#{error.class}) #{error.message}"
|
51
|
+
end
|
52
|
+
|
53
|
+
def parsed_from(content, path)
|
54
|
+
extension = File.extname(path)
|
55
|
+
parser_method = PARSER_METHODS.fetch(extension) do
|
56
|
+
raise Errors::UnsupportedDataFormatError, "Unsupported data format: #{extension}"
|
57
|
+
end
|
58
|
+
|
59
|
+
send(parser_method, content)
|
45
60
|
rescue Psych::SyntaxError, JSON::ParserError, CSV::MalformedCSVError => error
|
46
|
-
raise Errors::DataParseError, "Failed to parse
|
61
|
+
raise Errors::DataParseError, "Failed to parse data format in `#{path}`: (#{error.class}) #{error.message}"
|
47
62
|
end
|
48
63
|
|
64
|
+
def render_erb(content) = ERB.new(content).result(HelperContext.instance.get_binding)
|
65
|
+
|
49
66
|
def parse_yaml(content)
|
50
|
-
YAML.safe_load(content, permitted_classes: [Symbol], aliases: true)
|
67
|
+
YAML.safe_load(content, permitted_classes: [Symbol, Time], aliases: true)
|
51
68
|
end
|
52
69
|
|
53
70
|
def parse_json(content)
|
@@ -58,6 +75,19 @@ module Perron
|
|
58
75
|
CSV.new(content, headers: true, header_converters: :symbol).to_a.map(&:to_h)
|
59
76
|
end
|
60
77
|
|
78
|
+
class HelperContext
|
79
|
+
include Singleton
|
80
|
+
|
81
|
+
def initialize
|
82
|
+
self.class.include ActionView::Helpers::AssetUrlHelper
|
83
|
+
self.class.include ActionView::Helpers::DateHelper
|
84
|
+
self.class.include Rails.application.routes.url_helpers
|
85
|
+
end
|
86
|
+
|
87
|
+
def get_binding = binding
|
88
|
+
end
|
89
|
+
private_constant :HelperContext
|
90
|
+
|
61
91
|
class Item
|
62
92
|
def initialize(attributes)
|
63
93
|
@attributes = attributes.transform_keys(&:to_sym)
|
@@ -78,67 +108,3 @@ module Perron
|
|
78
108
|
private_constant :Item
|
79
109
|
end
|
80
110
|
end
|
81
|
-
|
82
|
-
# require "csv"
|
83
|
-
|
84
|
-
# module Perron
|
85
|
-
# class Data
|
86
|
-
# include Enumerable
|
87
|
-
|
88
|
-
# def initialize(resource)
|
89
|
-
# @file_path = path_for(resource)
|
90
|
-
# @data = data
|
91
|
-
# end
|
92
|
-
|
93
|
-
# def each(&block)
|
94
|
-
# @data.each(&block)
|
95
|
-
# end
|
96
|
-
|
97
|
-
# private
|
98
|
-
|
99
|
-
# PARSER_METHODS = {
|
100
|
-
# ".csv" => :parse_csv,
|
101
|
-
# ".json" => :parse_json,
|
102
|
-
# ".yaml" => :parse_yaml,
|
103
|
-
# ".yml" => :parse_yaml
|
104
|
-
# }.freeze
|
105
|
-
# SUPPORTED_EXTENSIONS = PARSER_METHODS.keys.freeze
|
106
|
-
|
107
|
-
# def path_for(identifier)
|
108
|
-
# path = Pathname.new(identifier)
|
109
|
-
|
110
|
-
# return path.to_s if path.file? && path.absolute?
|
111
|
-
|
112
|
-
# found_path = SUPPORTED_EXTENSIONS.lazy.map do |extension|
|
113
|
-
# Rails.root.join("app", "content", "data").join("#{identifier}#{extension}")
|
114
|
-
# end.find(&:exist?)
|
115
|
-
|
116
|
-
# found_path&.to_s or raise Errors::FileNotFoundError, "No data file found for '#{identifier}'"
|
117
|
-
# end
|
118
|
-
|
119
|
-
# def data
|
120
|
-
# content = File.read(@file_path)
|
121
|
-
# extension = File.extname(@file_path)
|
122
|
-
# parser = PARSER_METHODS.fetch(extension) do
|
123
|
-
# raise Errors::UnsupportedDataFormatError, "Unsupported data format: #{extension}"
|
124
|
-
# end
|
125
|
-
|
126
|
-
# raw_data = send(parser, content)
|
127
|
-
|
128
|
-
# unless raw_data.is_a?(Array)
|
129
|
-
# raise Errors::DataParseError, "Data in '#{@file_path}' must be an array of objects."
|
130
|
-
# end
|
131
|
-
|
132
|
-
# struct = Struct.new(*raw_data.first.keys, keyword_init: true)
|
133
|
-
# raw_data.map { struct.new(**it) }
|
134
|
-
# rescue Psych::SyntaxError, JSON::ParserError, CSV::MalformedCSVError => error
|
135
|
-
# raise Errors::DataParseError, "Failed to parse '#{@file_path}': #{error.message}"
|
136
|
-
# end
|
137
|
-
|
138
|
-
# def parse_yaml(content) = YAML.safe_load(content, permitted_classes: [Symbol], aliases: true)
|
139
|
-
|
140
|
-
# def parse_json(content) = JSON.parse(content, symbolize_names: true)
|
141
|
-
|
142
|
-
# def parse_csv(content) = CSV.new(content, headers: true, header_converters: :symbol).to_a.map(&:to_h)
|
143
|
-
# end
|
144
|
-
# end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Perron
|
4
|
+
class Resource
|
5
|
+
module Configuration
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
class_methods do
|
9
|
+
def configuration
|
10
|
+
@configuration ||= Options.new.tap do |config|
|
11
|
+
config.metadata = Options.new
|
12
|
+
|
13
|
+
config.feeds = Options.new
|
14
|
+
|
15
|
+
config.feeds.rss = ActiveSupport::OrderedOptions.new
|
16
|
+
config.feeds.rss.enabled = false
|
17
|
+
config.feeds.rss.path = "feeds/#{collection.name.demodulize.parameterize}.xml"
|
18
|
+
config.feeds.rss.max_items = 20
|
19
|
+
|
20
|
+
config.feeds.json = ActiveSupport::OrderedOptions.new
|
21
|
+
config.feeds.json.enabled = false
|
22
|
+
config.feeds.json.path = "feeds/#{collection.name.demodulize.parameterize}.json"
|
23
|
+
config.feeds.json.max_items = 20
|
24
|
+
|
25
|
+
config.linked_data = ActiveSupport::OrderedOptions.new
|
26
|
+
|
27
|
+
config.related_posts = ActiveSupport::OrderedOptions.new
|
28
|
+
config.related_posts.enabled = false
|
29
|
+
config.related_posts.max = 5
|
30
|
+
|
31
|
+
config.sitemap = ActiveSupport::OrderedOptions.new
|
32
|
+
config.sitemap.exclude = false
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def configure
|
37
|
+
yield(configuration)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
class Options < ActiveSupport::OrderedOptions
|
42
|
+
def method_missing(name, *arguments)
|
43
|
+
if name.to_s.end_with?("=")
|
44
|
+
key = name.to_s.chomp("=").to_sym
|
45
|
+
value = arguments.first
|
46
|
+
|
47
|
+
return self[key].merge!(value) if self[key].is_a?(ActiveSupport::OrderedOptions) && value.is_a?(Hash)
|
48
|
+
end
|
49
|
+
|
50
|
+
super
|
51
|
+
end
|
52
|
+
|
53
|
+
def respond_to_missing?(name, include_private = false) = super
|
54
|
+
end
|
55
|
+
private_constant :Options
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Perron
|
4
|
+
module Site
|
5
|
+
class Resource
|
6
|
+
class Related
|
7
|
+
module StopWords
|
8
|
+
module_function
|
9
|
+
|
10
|
+
def all
|
11
|
+
Set[
|
12
|
+
"a", "about", "above", "after", "again", "against", "all", "am",
|
13
|
+
"an", "and", "any", "are", "as", "at", "be", "because", "been",
|
14
|
+
"before", "being", "below", "between", "both", "but", "by", "can",
|
15
|
+
"did", "do", "does", "doing", "down", "during", "each", "few",
|
16
|
+
"for", "from", "further", "had", "has", "have", "having", "he",
|
17
|
+
"her", "here", "hers", "herself", "him", "himself", "his", "how",
|
18
|
+
"i", "if", "in", "into", "is", "it", "its", "itself", "just",
|
19
|
+
"me", "more", "most", "my", "myself", "no", "nor", "not", "now",
|
20
|
+
"of", "off", "on", "once", "only", "or", "other", "our", "ours",
|
21
|
+
"ourselves", "out", "over", "own", "s", "same", "she", "should",
|
22
|
+
"so", "some", "such", "t", "than", "that", "the", "their",
|
23
|
+
"theirs", "them", "themselves", "then", "there", "these", "they",
|
24
|
+
"this", "those", "through", "to", "too", "under", "until", "up",
|
25
|
+
"very", "was", "we", "were", "what", "when", "where", "which",
|
26
|
+
"while", "who", "whom", "why", "will", "with", "you", "your",
|
27
|
+
"yours", "yourself", "yourselves"
|
28
|
+
]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "perron/site/resource/related/stop_words"
|
4
|
+
|
5
|
+
module Perron
|
6
|
+
module Site
|
7
|
+
class Resource
|
8
|
+
class Related
|
9
|
+
def initialize(resource)
|
10
|
+
@resource = resource
|
11
|
+
@collection = resource.collection
|
12
|
+
end
|
13
|
+
|
14
|
+
def find(limit: 5)
|
15
|
+
@collection.resources
|
16
|
+
.reject { it.slug == @resource.slug }
|
17
|
+
.map { [it, cosine_similarities_for(@resource, it)] }
|
18
|
+
.sort_by { |_, score| -score }
|
19
|
+
.map(&:first)
|
20
|
+
.first(limit)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def cosine_similarities_for(resource_one, resource_two)
|
26
|
+
first_vector = tfidf_vector_for(resource_one)
|
27
|
+
second_vector = tfidf_vector_for(resource_two)
|
28
|
+
|
29
|
+
return 0.0 if first_vector.empty? || second_vector.empty?
|
30
|
+
|
31
|
+
dot_product = 0.0
|
32
|
+
|
33
|
+
first_vector.each_key { dot_product += first_vector[it] * second_vector[it] if second_vector.key?(it) }
|
34
|
+
|
35
|
+
first_magnitude = Math.sqrt(first_vector.values.sum { it**2 })
|
36
|
+
second_magnitude = Math.sqrt(second_vector.values.sum { it**2 })
|
37
|
+
denominator = first_magnitude * second_magnitude
|
38
|
+
|
39
|
+
return 0.0 if denominator.zero?
|
40
|
+
|
41
|
+
dot_product / denominator
|
42
|
+
end
|
43
|
+
|
44
|
+
def tfidf_vector_for(target_resource)
|
45
|
+
@tfidf_vectors ||= {}
|
46
|
+
|
47
|
+
return @tfidf_vectors[target_resource] if @tfidf_vectors.key?(target_resource)
|
48
|
+
|
49
|
+
tokens = tokenize_content(target_resource)
|
50
|
+
token_count = tokens.size
|
51
|
+
|
52
|
+
return {} if token_count.zero?
|
53
|
+
|
54
|
+
term_count = Hash.new(0)
|
55
|
+
|
56
|
+
tokens.each { |token| term_count[token] += 1 }
|
57
|
+
|
58
|
+
tfidf_vector = {}
|
59
|
+
|
60
|
+
term_count.each do |term, count|
|
61
|
+
terms = count.to_f / token_count
|
62
|
+
|
63
|
+
tfidf_vector[term] = terms * inverse_document_frequency[term]
|
64
|
+
end
|
65
|
+
|
66
|
+
@tfidf_vectors[target_resource] = tfidf_vector
|
67
|
+
end
|
68
|
+
|
69
|
+
def tokenize_content(target_resource)
|
70
|
+
@tokenized_content ||= {}
|
71
|
+
|
72
|
+
return @tokenized_content[target_resource] if @tokenized_content.key?(target_resource)
|
73
|
+
|
74
|
+
content = target_resource.content.gsub(/<[^>]*>/, " ")
|
75
|
+
tokens = content.downcase.scan(/\w+/).reject { StopWords.all.include?(it) || it.length < 3 }
|
76
|
+
|
77
|
+
@tokenized_content[target_resource] = tokens
|
78
|
+
end
|
79
|
+
|
80
|
+
def inverse_document_frequency
|
81
|
+
@inverse_document_frequency ||= begin
|
82
|
+
resource_frequency = Hash.new(0)
|
83
|
+
|
84
|
+
@collection.resources.each { tokenize_content(it).uniq.each { resource_frequency[it] += 1 } }
|
85
|
+
|
86
|
+
frequencies = {}
|
87
|
+
total_resources = @collection.resources.size
|
88
|
+
|
89
|
+
resource_frequency.each do |term, frequency|
|
90
|
+
frequencies[term] = Math.log(total_resources.to_f / (1 + frequency))
|
91
|
+
end
|
92
|
+
|
93
|
+
frequencies
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
data/lib/perron/site/resource.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "perron/site/resource/configuration"
|
3
4
|
require "perron/site/resource/core"
|
4
5
|
require "perron/site/resource/class_methods"
|
5
6
|
require "perron/site/resource/publishable"
|
7
|
+
require "perron/site/resource/related"
|
6
8
|
require "perron/site/resource/slug"
|
7
9
|
require "perron/site/resource/separator"
|
8
10
|
|
@@ -10,6 +12,7 @@ module Perron
|
|
10
12
|
class Resource
|
11
13
|
ID_LENGTH = 8
|
12
14
|
|
15
|
+
include Perron::Resource::Configuration
|
13
16
|
include Perron::Resource::Core
|
14
17
|
include Perron::Resource::ClassMethods
|
15
18
|
include Perron::Resource::Publishable
|
@@ -47,6 +50,9 @@ module Perron
|
|
47
50
|
|
48
51
|
def collection = Collection.new(self.class.model_name.collection)
|
49
52
|
|
53
|
+
def related_resources(limit: 5) = Perron::Site::Resource::Related.new(self).find(limit:)
|
54
|
+
alias_method :related, :related_resources
|
55
|
+
|
50
56
|
private
|
51
57
|
|
52
58
|
def processable?
|
@@ -55,7 +61,7 @@ module Perron
|
|
55
61
|
|
56
62
|
def generate_id
|
57
63
|
Digest::SHA1.hexdigest(
|
58
|
-
@file_path.delete_prefix(Perron.configuration.input).parameterize
|
64
|
+
@file_path.delete_prefix(Perron.configuration.input.to_s).parameterize
|
59
65
|
).first(ID_LENGTH)
|
60
66
|
end
|
61
67
|
end
|
data/lib/perron/site.rb
CHANGED
@@ -4,6 +4,7 @@ require "perron/site/builder"
|
|
4
4
|
require "perron/site/collection"
|
5
5
|
require "perron/site/resource"
|
6
6
|
require "perron/site/data"
|
7
|
+
require "perron/site/data/proxy"
|
7
8
|
|
8
9
|
module Perron
|
9
10
|
module Site
|
@@ -24,13 +25,14 @@ module Perron
|
|
24
25
|
def collections
|
25
26
|
@collections ||= Dir.children(Perron.configuration.input)
|
26
27
|
.select { File.directory?(File.join(Perron.configuration.input, it)) }
|
28
|
+
.reject { it == "data" }
|
27
29
|
.map { Collection.new(it) }
|
28
30
|
end
|
29
31
|
|
30
32
|
def collection(name) = Collection.new(name)
|
31
33
|
|
32
|
-
def data(name)
|
33
|
-
Perron::Data.new(name)
|
34
|
+
def data(name = nil)
|
35
|
+
(name && Perron::Data.new(name)) || Perron::Data::Proxy.new
|
34
36
|
end
|
35
37
|
end
|
36
38
|
end
|
data/lib/perron/version.rb
CHANGED