web_stat 0.3.2 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eff1d2b80ff6bf4b61f82a100319a6b37ee13cb8a2ad96a11fd336c8a7164398
4
- data.tar.gz: f4bf36491dbe6ae32b1cbf718ceb1fe7e91adce60848f4296d6c1319ea5c4eea
3
+ metadata.gz: 5304507f8ce01a6a98717756927366005db0eceb89b3d22f29e4283c6eedb4d7
4
+ data.tar.gz: f561221644f98831867c2d853201a0312dcb75674ca4ae247b319b4f2a1ba085
5
5
  SHA512:
6
- metadata.gz: f38fe9d4f0ad495107c5ac42d2d240b42e810ae4f7ad0db5738b705f7e03b24846875cbc2139a334ebfc7d33ff1df953dec768f69f13a0932cd01a2c6f221753
7
- data.tar.gz: 5f30259af5a84e2eb43645766aac49d151367a82c9e5643a0ddf3a5f15dbcbba59884def134cfaa19eaca1a932e77c28d65bfd389cecdd0262812cad808ff5e3
6
+ metadata.gz: bec01871973e80dc46b9246a264791213b3bdd4cb20693fc0d51cc28c2d9bba28b40af65322dc2b0c950e9bb5aedd288ff8fd771adfbd5b2aee1564b98888cb0
7
+ data.tar.gz: 5cfce031ad779f347fae362ccff2deadb3711e4f9fcc06cafcd232b86e8c18aff63e40c430a330cfdf00d398aa5257457c5dc9f3bee492478359d3801e217f08
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.3.2)
4
+ web_stat (0.3.6)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
7
  mechanize (>= 2.7)
@@ -1,15 +1,21 @@
1
- # Minimum number of characters to detect meta title
2
- min_length_of_meta_title: 10
3
- # Split regular expression for titles
4
- regex_to_sprit_title: '\||-|:|||:|〜|\~| '
5
- # User Agent
6
- user_agent: "web_stat gem agent"
7
- # Eyecatch image xpaths
8
- eyecatch_image_xpaths:
9
- - '/html/head/meta[@property="twitter:image"]/@content'
10
- - '/html/head/meta[@property="og:image"]/@content'
11
- - '//img[@class="attachment-post-thumbnail"]/@src'
12
- - '//div[@id="content"]//img/@src'
13
- - '//img/@src'
14
- userdic: ""
15
- use_chromedirver: true
1
+ development: &development
2
+ # Minimum number of characters to detect meta title
3
+ min_length_of_meta_title: 10
4
+ # Split regular expression for titles
5
+ regex_to_sprit_title: '\||-|:|||:|〜|\~| – '
6
+ # User Agent
7
+ user_agent: "web_stat gem agent"
8
+ # Eyecatch image xpaths
9
+ eyecatch_image_xpaths:
10
+ - '/html/head/meta[@property="twitter:image"]/@content'
11
+ - '/html/head/meta[@property="og:image"]/@content'
12
+ - '//img[@class="attachment-post-thumbnail"]/@src'
13
+ - '//div[@id="content"]//img/@src'
14
+ - '//img/@src'
15
+ userdic: ""
16
+ use_chromedirver: false
17
+ test:
18
+ <<: *development
19
+ production:
20
+ <<: *development
21
+ use_chromedirver: true
@@ -3,31 +3,37 @@ module WebStat
3
3
  class Configure
4
4
  DEFAULT_CONFIG_FILE_PATH = 'config/web_stat.yml'
5
5
 
6
- # Get yaml
7
- def self.get
8
- YAML.load_file(self.get_configure_path)
9
- end
10
-
11
- # Get configure path
12
- def self.get_configure_path
13
- if File.exists?(self.get_custom_configure_path)
14
- self.get_custom_configure_path
15
- else
16
- self.get_default_configure_path
6
+ class << self
7
+ # Get yaml
8
+ def get
9
+ if defined? Rails
10
+ YAML.load_file(get_configure_path)[Rails.env]
11
+ else
12
+ YAML.load_file(get_configure_path)["production"]
13
+ end
17
14
  end
18
- end
19
-
20
- # Get default configure path
21
- def self.get_default_configure_path
22
- File.join(File.expand_path("../", __FILE__), DEFAULT_CONFIG_FILE_PATH)
23
- end
24
-
25
- # Get custom configure path
26
- def self.get_custom_configure_path
27
- if defined? Rails
28
- File.join(Rails.root, DEFAULT_CONFIG_FILE_PATH)
29
- else
30
- File.join(Bundler.root, DEFAULT_CONFIG_FILE_PATH)
15
+
16
+ # Get configure path
17
+ def get_configure_path
18
+ if File.exists?(get_custom_configure_path)
19
+ get_custom_configure_path
20
+ else
21
+ get_default_configure_path
22
+ end
23
+ end
24
+
25
+ # Get default configure path
26
+ def get_default_configure_path
27
+ File.join(File.expand_path("../", __FILE__), DEFAULT_CONFIG_FILE_PATH)
28
+ end
29
+
30
+ # Get custom configure path
31
+ def get_custom_configure_path
32
+ if defined? Rails
33
+ File.join(Rails.root, DEFAULT_CONFIG_FILE_PATH)
34
+ else
35
+ File.join(Bundler.root, DEFAULT_CONFIG_FILE_PATH)
36
+ end
31
37
  end
32
38
  end
33
39
  end
@@ -47,6 +47,9 @@ module WebStat
47
47
  break
48
48
  end
49
49
  end
50
+ if path.nil? || path.empty?
51
+ path = @nokogiri.at('body').xpath('//img').first.attr('src')
52
+ end
50
53
  if ! path.nil? && path.match(/^\//)
51
54
  "#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
52
55
  else
@@ -57,7 +60,7 @@ module WebStat
57
60
  # Get local path to save url
58
61
  # @param [String] url
59
62
  def save_local_path(url)
60
- return nil if url.nil?
63
+ return nil if url.nil? || ! url.match(%{^http})
61
64
  tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
62
65
  agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
63
66
  image = agent.get(url)
@@ -96,7 +99,7 @@ module WebStat
96
99
  # Get the informations of @url
97
100
  # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
98
101
  def stat(userdics: nil)
99
- clean_content = content.scrub('').gsub(/[\n\t\r ]/, "").gsub(/\s{2,}/, "\s")
102
+ clean_content = content.scrub('').gsub(/[\n\t\r ]/, "").gsub(/\s{2,}/, "\s").gsub(URI.regexp, "")
100
103
  language_code = CLD.detect_language(clean_content)[:code]
101
104
  if userdics && userdics.has_key?(language_code) && File.exists?(userdics[language_code])
102
105
  tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics[language_code])
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.3.2"
2
+ VERSION = "0.3.7"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-13 00:00:00.000000000 Z
11
+ date: 2020-06-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler