nhkore 0.3.19 → 0.3.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a63aff9e86de6a678a4b4ad3defa9bf7d28577dabad0ef0a69747ddce1224219
4
- data.tar.gz: 12668a0e95694198c928644d51be450c485c0947f5741dd01ab2ff8e20b0e760
3
+ metadata.gz: 6d6942febe79f05d1cbd53e40f9048671ee0462d16e22945a6b7bdb3bb5bb2ae
4
+ data.tar.gz: 226acf4e93e6b95b475a1d42bc7a0d2dff82d5647613164fc2feba882e24b6c7
5
5
  SHA512:
6
- metadata.gz: 4c7981a84189176febf8156251b850e604ef1c447e1c78d0a8af5a4a509b68c0cf564b54e7cfac4582d1c572a50da0f05f9e268b19e4ac9d170ae0806688f68d
7
- data.tar.gz: fd540fd9952b5377fcf4db72e4a5e71724d0a8d20ae256ecc4bb8293c672cd0fea1284e2639444fe8fbd892859fbac85ebc66fbabe96c51fdd60322e96f8419a
6
+ metadata.gz: 40d4b5513b9a3d22e4969ef7d88c477d4c1dc118165660e751c8c9d742305d74470494597e5e12f61e15af9dc4c3f4e0bccdd82ac7e8488cb2a312a9749ada20
7
+ data.tar.gz: 00307ecff6363a7f0b80595fc7ec14a01f3e9e303d5d3023e084883a075bed12464a388ca16b6ff6ca143e993bdb5834493d361084452dd61846567173f0ef7f
data/CHANGELOG.md CHANGED
@@ -1,12 +1,18 @@
1
1
  # Changelog | NHKore
2
2
 
3
- All notable changes to this project will be documented in this file.
3
+ - [Keep a Changelog v1.0.0](https://keepachangelog.com/en/1.0.0)
4
+ - [Semantic Versioning v2.0.0](https://semver.org/spec/v2.0.0.html)
4
5
 
5
- Format is based on [Keep a Changelog v1.0.0](https://keepachangelog.com/en/1.0.0),
6
- and this project adheres to [Semantic Versioning v2.0.0](https://semver.org/spec/v2.0.0.html).
6
+ ## [Unreleased]
7
+ - https://github.com/esotericpig/nhkore/compare/v0.3.22...v0.3
7
8
 
8
- ## [[Unreleased]](https://github.com/esotericpig/nhkore/compare/v0.3.19...HEAD)
9
- -
9
+
10
+ ## [v0.3.22] - 2025-04-30
11
+
12
+ ### Changed
13
+ - Put v0.3 in its own branch to prepare for v0.4, which will heavily change.
14
+ - Changed v0.3 links to use v0.3 branch.
15
+ - Reverted the removing of `UserAgents` for v0.3 only.
10
16
 
11
17
 
12
18
  ## [v0.3.19] - 2025-04-28
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- nhkore (0.3.19)
4
+ nhkore (0.3.22)
5
5
  attr_bool (~> 0.2)
6
6
  bimyou_segmenter (~> 1.2)
7
7
  cri (~> 2.15)
@@ -14,7 +14,6 @@ PATH
14
14
  psychgus (~> 1.3)
15
15
  public_suffix (~> 6.0)
16
16
  rainbow (~> 3.1)
17
- ronin-web-user_agents (~> 0.1)
18
17
  rss (~> 0.3)
19
18
  rubyzip (~> 2.4)
20
19
  tiny_segmenter (~> 0.0)
@@ -50,7 +49,7 @@ GEM
50
49
  stringio
51
50
  psychgus (1.3.5)
52
51
  psych (>= 3.0)
53
- public_suffix (6.0.1)
52
+ public_suffix (6.0.2)
54
53
  racc (1.8.1)
55
54
  rainbow (3.1.1)
56
55
  rake (13.2.1)
@@ -62,7 +61,6 @@ GEM
62
61
  reline (0.6.1)
63
62
  io-console (~> 0.5)
64
63
  rexml (3.4.1)
65
- ronin-web-user_agents (0.1.1)
66
64
  rss (0.3.1)
67
65
  rexml
68
66
  rubyzip (2.4.1)
data/README.md CHANGED
@@ -732,7 +732,7 @@ if !File.exist?(file)
732
732
  end
733
733
  ```
734
734
 
735
- ### Util & DatetimeParser
735
+ ### Util, UserAgents, & DatetimeParser
736
736
 
737
737
  These provide a variety of useful methods/constants.
738
738
 
@@ -740,6 +740,7 @@ Here are some of the most useful ones:
740
740
 
741
741
  ```Ruby
742
742
  require 'nhkore/datetime_parser'
743
+ require 'nhkore/user_agents'
743
744
  require 'nhkore/util'
744
745
 
745
746
  include NHKore
@@ -747,6 +748,10 @@ include NHKore
747
748
  puts '======='
748
749
  puts '[ Net ]'
749
750
  puts '======='
751
+ # Get a random User Agent for HTTP header field 'User-Agent'.
752
+ # - This is used by default in Scraper/SearchScraper.
753
+ puts "User-Agent: #{UserAgents.sample()}"
754
+
750
755
  uri = URI('https://www.bing.com/search?q=nhk')
751
756
  Util.replace_uri_query!(uri,q: 'banana')
752
757
 
@@ -9,6 +9,7 @@
9
9
  #++
10
10
 
11
11
  require 'nhkore/util'
12
+ require 'nhkore/version'
12
13
 
13
14
  module NHKore
14
15
  module CLI
@@ -16,7 +17,8 @@ module CLI
16
17
  DEFAULT_GET_CHUNK_SIZE = 4 * 1024
17
18
  DEFAULT_GET_URL_LENGTH = 11_000_000 # Just a generous estimation used as a fallback; may be outdated.
18
19
  GET_URL_FILENAME = 'nhkore-core.zip'
19
- GET_URL = "https://github.com/esotericpig/nhkore/releases/latest/download/#{GET_URL_FILENAME}".freeze
20
+ GET_URL = "https://github.com/esotericpig/nhkore/releases/download/v#{NHKore::VERSION}" \
21
+ "/#{GET_URL_FILENAME}".freeze
20
22
 
21
23
  def build_get_cmd
22
24
  app = self
@@ -36,9 +38,7 @@ module CLI
36
38
  DESC
37
39
 
38
40
  option :o,:out,'directory to save downloaded files to',argument: :required,default: Util::CORE_DIR,
39
- transform: lambda { |value|
40
- app.check_empty_opt(:out,value)
41
- }
41
+ transform: ->(value) { app.check_empty_opt(:out,value) }
42
42
  flag nil,:'show-url','show download URL and exit (for downloading manually)' do |_value,_cmd|
43
43
  puts GET_URL
44
44
  exit
@@ -69,7 +69,7 @@ module CLI
69
69
  out_dir = @cmd_opts[:out]
70
70
 
71
71
  begin
72
- start_spin('Opening URL')
72
+ start_spin("Opening URL: #{GET_URL} ")
73
73
 
74
74
  begin
75
75
  down = Down::NetHttp.open(GET_URL,rewindable: false,**@scraper_kargs)
data/lib/nhkore/lib.rb CHANGED
@@ -31,6 +31,7 @@ require 'nhkore/search_link'
31
31
  require 'nhkore/search_scraper'
32
32
  require 'nhkore/sifter'
33
33
  require 'nhkore/splitter'
34
+ require 'nhkore/user_agents'
34
35
  require 'nhkore/util'
35
36
  require 'nhkore/variator'
36
37
  require 'nhkore/version'
@@ -11,9 +11,9 @@
11
11
  require 'attr_bool'
12
12
  require 'nokogiri'
13
13
  require 'open-uri'
14
- require 'ronin/web/user_agents'
15
14
 
16
15
  require 'nhkore/error'
16
+ require 'nhkore/user_agents'
17
17
  require 'nhkore/util'
18
18
 
19
19
  module NHKore
@@ -23,7 +23,7 @@ module NHKore
23
23
  DEFAULT_HEADER = {
24
24
  # See for better ones:
25
25
  # - https://www.useragentstring.com/pages/Chrome/
26
- 'user-agent' => Ronin::Web::UserAgents.random,
26
+ 'user-agent' => UserAgents.sample,
27
27
 
28
28
  'accept' => 'text/html,application/xhtml+xml,application/xml,application/rss+xml,text/xml;' \
29
29
  'q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
@@ -111,14 +111,14 @@ module NHKore
111
111
  super(url,**kargs)
112
112
  end
113
113
 
114
- # FIXME: Bing no longer allows `count`.
115
- # rubocop:disable Lint/UnusedMethodArgument
116
114
  def self.build_url(site,count: DEFAULT_RESULT_COUNT,**_kargs)
117
115
  url = ''.dup
118
116
 
119
117
  url << 'https://www.bing.com/search?'
120
118
  url << URI.encode_www_form(
121
119
  q: "site:#{site}",
120
+ count: count,
121
+
122
122
  qs: 'n',
123
123
  sp: '-1',
124
124
  lq: '0',
@@ -131,15 +131,13 @@ module NHKore
131
131
 
132
132
  return url
133
133
  end
134
- # rubocop:enable Lint/UnusedMethodArgument
135
134
 
136
135
  def scrape(slinks,page = NextPage.new())
137
- next_page,_link_count = scrape_html(slinks,page)
136
+ next_page,link_count = scrape_html(slinks,page)
138
137
 
139
- # FIXME: Bing no longer allows RSS pages after the first page.
140
- # if link_count <= 0
141
- # scrape_rss(slinks,page,next_page)
142
- # end
138
+ if link_count <= 0
139
+ scrape_rss(slinks,page,next_page)
140
+ end
143
141
 
144
142
  return next_page
145
143
  end
data/lib/nhkore/sifter.rb CHANGED
@@ -322,7 +322,7 @@ module NHKore
322
322
  end
323
323
 
324
324
  def sift
325
- master_article = Article.new
325
+ result = Article.new
326
326
 
327
327
  @articles.each do |article|
328
328
  next if filter?(article)
@@ -333,11 +333,11 @@ module NHKore
333
333
  next if word.freq <= 1
334
334
  next if word.word =~ /\p{Latin}|[[:digit:]]/
335
335
 
336
- master_article.add_word(word,use_freq: true)
336
+ result.add_word(word,use_freq: true)
337
337
  end
338
338
  end
339
339
 
340
- words = master_article.words.values
340
+ words = result.words.values
341
341
 
342
342
  words.sort! do |word1,word2|
343
343
  # Order by freq DESC (most frequent words to top).