dxtitle_search 0.2.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: febca2af1d5f671487bb3a374e0ce3a3a1c9d1bc61a365a7e264da32f2865d44
4
- data.tar.gz: 8fb8561355abb9b169b789fa9388e6c70f12c92521d4dea14594ab74e8ad69d9
3
+ metadata.gz: e227dc645595c967bf03dd213ea6846c7c9c438a26ee7db3fd059f464493f370
4
+ data.tar.gz: eccff27f864a56f00f0952d8ddd47a70eaeb2fca6eca24d961cf4c91d54f7387
5
5
  SHA512:
6
- metadata.gz: c6705c471563eede40e9dd71aa133dc3e5adf9a2f014864c44312aab6a3235bd335648b2dc9a40f6d3d1a339ea50258cfe2c31f56d61f617f67dba5be4237379
7
- data.tar.gz: 9268dc3c81722d2e42824a3e6ac175e7c12e4927254fcdb7f95825f3797b060e2c912ba62af4517841c4f076cd3379ee4be48c44e866796037fb7cb27ad2520c
6
+ metadata.gz: db771d09b581573be92de6c65b79c3f635dbcb6187e0bc1d6a00c5d4042a3f13f4838696588d65cc9856be56ed5666047221204e5910fbe38e8aec42f075ee70
7
+ data.tar.gz: f39cbbb2ca067cfb4c307787b94dab70c7c0d34255037e24e9228b32f284ebbe6361523456d078659aa11e5e3e694ef6dfae3890b18b87ce90d2a707d618a14d
checksums.yaml.gz.sig CHANGED
Binary file
data.tar.gz.sig CHANGED
Binary file
@@ -2,96 +2,95 @@
2
2
 
3
3
  # file: dxtitle_search.rb
4
4
 
5
- require 'dynarex'
5
+ require 'indexer101'
6
6
 
7
7
 
8
8
  class DxTitleSearch
9
9
 
10
- def initialize(obj=nil, sources: nil, debug: false)
10
+ def initialize(obj=nil, sources: nil, level: 1, debug: false)
11
11
 
12
12
  @debug = debug
13
+ @indexer = Indexer101.new debug: debug
14
+ @level = level
13
15
 
14
16
  s = if sources then
15
17
 
16
- dx = Dynarex.new(sources)
17
- dx.all.map {|x| read x.uri }.join
18
+ dx = Dynarex.new(sources)
19
+ puts 'before scan_dxindex' if @debug
20
+ a = dx.all.map(&:uri)
21
+ puts 'a: ' + a.inspect if @debug
22
+ @indexer.scan_dxindex a, level: level
18
23
 
19
- elsif obj then
20
-
21
- # is it a Dynarex file location?
22
- if obj.lines.length < 2 then
23
-
24
- read obj
24
+ elsif obj and (obj.is_a?(DxLite) or obj.is_a?(Dynarex)) or obj.lines.length < 2
25
25
 
26
- else
26
+ @indexer.scan_dxindex obj, level: level
27
27
 
28
- obj
29
-
30
- end
31
28
  end
32
29
 
33
- @h = h = s.lines.inject({}) do |r,x|
34
- key, value = x.split(/\s+(?=[^\s]+$)/,2)
35
- r.merge(key.rstrip => value)
36
- end
37
-
38
- @a = h.keys
30
+ #jr230620 @indexer.build
39
31
 
40
32
  end
41
33
 
42
- def search(keywords)
43
-
44
- phrases = @a.grep /#{keywords}/i
34
+ def search(keywords, minchars: 3)
45
35
 
46
- # find out the keywords count for each entry found
47
- a0 = keywords.split.flat_map do |x|
48
- next if @a.length < 2
49
- @a.grep /#{x}/i
50
- end
51
-
52
- a = a0.uniq.map do |entry|
53
- [entry, entry.scan(/#{keywords.split.join('|')}/).uniq.count]
54
- end
36
+ a2 = @indexer.search keywords.split(/[\s:"!\?\(\)£]+(?=[\w#_'-]+)/), \
37
+ minchars: minchars
38
+ # format each result as a Hash object
39
+ a3 = a2.map do |date, title, url|
55
40
 
56
- # sort by keywords found per entry and then date
57
- #a2 = (phrases + a).uniq.sort do |x, x2|
58
- a2 = a.sort do |x, x2|
59
- -([x.last, x.first[/^\d+/], ] <=> [x2.last, x2.first[/^\d+/]])
60
- end
41
+ {title: title, url: url, date: date}
61
42
 
62
- # format each result as a Hash object
63
- a3 = (phrases + a2).map do |x|
43
+ end
64
44
 
65
- if x.length > 1 then
66
- line, _ = x
67
- else
68
- line = x
69
- end
45
+ puts 'a3: ' + a3.inspect if @debug
46
+
47
+ @dx = Dynarex.new('results/result(title, url, date)').import(a3)
48
+
49
+ def a3.to_dx()
50
+ Dynarex.new('results/result(title, url, date)').import(self)
51
+ end
52
+
53
+ def a3.to_tags()
54
+ a = self.map {|x| x[:title].scan(/(?<=#)(\w+)/)}.flatten
55
+ a.uniq.sort.map {|x| [x, a.count(x)]}
56
+ end
57
+
58
+ def a3.search(keywords)
70
59
 
71
- puts 'line: ' + line.inspect if @debug
72
-
73
- rawtime, title = line.split(/ +/,2)
74
- puts 'title: ' + title.inspect if @debug
75
-
76
- {title: title, url: @h[line], date: Time.at(rawtime.to_i)}
60
+ dx = Dynarex.new('results/result(title, url, date)').import(self)
77
61
 
62
+ level = keywords[0] == '#' ? 0 : 1
63
+ dts = DxTitleSearch.new dx, level: level
64
+ dts.search keywords
65
+
78
66
  end
67
+
68
+ def a3.tag_search(keywords)
69
+
70
+ dx = Dynarex.new('results/result(title, url, date)').import(self)
79
71
 
80
- puts 'a3: ' + a3.inspect if @debug
72
+ level = keywords[0] == '#' ? 0 : 1
73
+ dts = DxTitleSearch.new dx, level: level
74
+ dts.tag_search keywords
75
+
76
+ end
81
77
 
82
78
  return a3
83
79
 
84
80
  end
85
81
 
86
- private
87
-
88
- def read(source)
89
-
90
- dx = Dynarex.new(source)
82
+ def tag_search(keywords)
83
+ r = @indexer.lookup *keywords.split(/[\W]+(?=[\w]+)/).map {|x| "#" + x}
84
+ r.map {|x| x.to_s[1..-1]}
85
+ end
91
86
 
92
- dx.all.map do |x|
93
- "%d %s %s" % [Time.parse(x.created).to_i, x.title, x.url]
94
- end.join("\n")
87
+ def to_tags()
88
+
89
+ a = @indexer.index.map do |key, value|
90
+ [key.to_s[1..-1], value.length]
91
+ end
92
+
93
+ a.sort_by(&:first)
95
94
 
96
95
  end
97
96
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dxtitle_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -11,54 +11,54 @@ cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
13
  MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
- YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTkwNzIzMjE0NjM1WhcN
15
- MjAwNzIyMjE0NjM1WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
- cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQC4fglD
17
- MJeEttUrAcVhZbHOvDxmej9AsrgXLSOdp7pVoUXQk1tEVjKH+1ibOhxxhx5Eiptk
18
- UGqqb/X67jXasjLr1wtY2dw/LKLAIoX5iPqE4vI0I0L5tGSF9i60f1KdgfIpgnx+
19
- fVNq32btdyoFuJBNG7yDBFjJ21ao5kyO9xpSUHnUDrrwyrdTZc9v9rG0kUEKzK/w
20
- 9obVvTWBmfIjBt3Vf5O2f88S+6boBJu6fDn0smnJEScHwTgj65yLN+ejPAanlqqt
21
- sxxxjLkdYl3LVDRUKXL09XcENdDFKwyiZu7kKJiVq7kvUcwMOBFobp2iWY9Yiam9
22
- HeRjOr8W35bZ3xScPLIVU08BYscUxoOHsMRpNPUY5I8SIYCLPJ7YvBO4ZlxJT5xQ
23
- E7M8aG/uKxwHKgHJu/3siL2cU/YtkVNS9yTxHdzD++90q3sC8uOnBLUMDTsKpV4l
24
- +jHMccyHbQXoazVQoVu7xNup2BpbiHALacarCLvu9GGJLrXkcX40ujB+TMcCAwEA
25
- AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUTUrPUS7w
26
- ILFIUp8s90zt8ljXtpkwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjEwMjIyMDAxMTU0WhcN
15
+ MjIwMjIyMDAxMTU0WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDxwG7T
17
+ /9FLpGJct6ptTTRa2UnYj9qeRp2KmjnMpONFP7Owl1yjh2auzyIfDChG7G2W9kZ4
18
+ eOPno39R2GJOxt5IVTxmjgBpAUl8xW3n629b2jE7al1GJZ89Jm3Fqs1rBeUaC0wB
19
+ DIX4SVwHqfzxvZTVLVZ5LPIKqxNiJe8eUIRRzzc0/XzyFTh3Sg4SH1ncm04vvxWg
20
+ piQyL6iUeUuirHjotchLINWzi2A4PgO63/YYM3tYPzbykkiBrE/iJHnPuFEjNfIj
21
+ +tVTvPB0wXdCDNf6yHr9cfy0t97eVGTbwv+UhWX9Vmz6/t4bi9X2xhmdp32y9CK6
22
+ s/I16J3oC70JvsTKOhDAM3OMQFjCpBbtyFYuJDbDbQKD/elGXTHvAmCi5skCNJzE
23
+ oq1l5aVpp3IFrZQfLEimeDDEq8ulEajlllpitq1vD+vycRSydybtAhMs/kK7DY7D
24
+ kVGOI1HDz5p+A6e8wyXoecdaQeswH3gd3nkCKfV7ohKXXywW8JWMEBgYj4ECAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU1qWuKrKD
26
+ A90s8y+KBolbTgtLOP8wJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
27
  c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
- BgkqhkiG9w0BAQsFAAOCAYEAaEl3CKusWW3/wUutwDz0w0ojYuc48LOVrmGpZ8uH
29
- fi2wJx5Jt41/qxGWCz0qo/BFfFZjNllblywzhjIw80HnoGAR5bDaKF0fTdYsh++a
30
- Vyx6BkFVM5hYXIJklBT/PLR7JW/rOrFgPNrXM+BlWlFBGIQT8KaF7SfOYrDdqHG8
31
- uLlGPgLkR0U3tLfyWwF6548QRVz2Lm53dWeT0JGs974Dj+n1b8P3N0Uv1AFpetNB
32
- UTelmW2QjiLgxJxkFkRjp/Ep0s/puw3FbtaviQDBBRr9I/bMAE5QOtKa3k1e7m70
33
- 39utqhm1XpZeJamPbeuSFl6M3RRiwWACVlz9QjWVgA5JSf1GwCsc0nsa76pq7Yfj
34
- DtQaJilztwDXwMGWAJ5qDOiGcuv3Wmec+Nnr/9xoZRA9V/9GqXlWjqSSLgFHk/r1
35
- ODgOmcgAsTin0T9l0vWQJnj2OClh1pcC6tFnQXXcYuwTpbtM2v4OmUfUDvJmwQ42
36
- POydZUYZg49X9TlTDqPMKed0
28
+ BgkqhkiG9w0BAQsFAAOCAYEAHJI3akrM/ne2v1zYcZOy0POu/LLfcBM93BuQcfiN
29
+ ok256P7VWXSjU2TaR8/vLDK0dbZo5V08WCKk08yuqOdzOII7JSVh0QHzzgFTR7yS
30
+ RG3xDZtp6j4MXzoAkLd06rIL1O21GXGc49Wkc7XHeFytbRF8XhT29e4H4s5KMoJ1
31
+ l9Ks2oMc4USzKo7loCU+1q0Sg8BQEpS2+9RNTpG7MKDdneWXhb2o6UTQ3WIDpfEt
32
+ ImShbAmoVHcdMa3HZUH0Pa+pzr/KrM0sg2fa2trlJS9oDq9pZl/cQO6Ryl76lqKg
33
+ bQ7jxCYFron1UH6iS3bJBPfFmIWcio3zT8VmeqVPhXatDDnb00JijkO/WThobMxb
34
+ aZJQ2A0sZWHF9u5v9gda8mq21CID3YMPRzQ1u5o4jjyNhcVyjW29GvfD8+8D4s46
35
+ g10O6gIBUztpsNHwbBTi9fHgtis4E5dAPAo4uzo4MCKVwi72iZQ3E92ACYb9zlxA
36
+ g6CsTY0vUjhgfh9taSRj3qxd
37
37
  -----END CERTIFICATE-----
38
- date: 2019-08-07 00:00:00.000000000 Z
38
+ date: 2021-02-22 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
- name: dynarex
41
+ name: indexer101
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '1.8'
46
+ version: '0.2'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 1.8.19
49
+ version: 0.2.5
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - "~>"
55
55
  - !ruby/object:Gem::Version
56
- version: '1.8'
56
+ version: '0.2'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 1.8.19
59
+ version: 0.2.5
60
60
  description:
61
- email: james@jamesrobertson.eu
61
+ email: digital.robertson@gmail.com
62
62
  executables: []
63
63
  extensions: []
64
64
  extra_rdoc_files: []
@@ -83,8 +83,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  - !ruby/object:Gem::Version
84
84
  version: '0'
85
85
  requirements: []
86
- rubygems_version: 3.0.1
86
+ rubyforge_project:
87
+ rubygems_version: 2.7.10
87
88
  signing_key:
88
89
  specification_version: 4
89
- summary: Search title entries from plain text (derived from a Dynarex document).
90
+ summary: Originally intended to search title entries from plain text (derived from
91
+ a Dynarex document).
90
92
  test_files: []
metadata.gz.sig CHANGED
Binary file