dxtitle_search 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ce1fbac8b03c6ed8f54bd015c10f8d0cff189489daef845e2ba7334d51c2f43a
4
- data.tar.gz: 1097753eb43800ed8a1361e605fd4e51b427aef0fffcd67649cec7b04a0c12b7
3
+ metadata.gz: 74ea6c681b08bebcc9fa822a185562a19a262544abf12e674a150428470c8396
4
+ data.tar.gz: 1ea0ffc0e631086452ef97e51b5e154f21169d4f4274f5538a4ce7c4e476b67b
5
5
  SHA512:
6
- metadata.gz: ef562657ad220e26e087f672a178475fb15c4e41874f7eacf270c1872e0461763a2b145b509541323467c48ad02e74c91b04d0c14cd15e515a22dd08de90644c
7
- data.tar.gz: 812c6c680a264edd2c0ae95fb63b86fb66485642ddb7e2d41696ac1ff51463e6e73cabd4c7fc29cfc8c9fcd9b6e49bb0374f1d1a2f6676e7bfc903afd515dedc
6
+ metadata.gz: 4e4e998c4df64f0e18ac5556ad197d13ecec2804c9abc7f4ded1a2c7a0a1e89f5c73a73da7ffd8ed418ee5f2eea23233b675eb79412cf68e09c3bb0e333d2def
7
+ data.tar.gz: 45fc402f35aa22ff4bc66ecf752f8bb946acfeea84106513b382ba921b41c5f68882eb5b415146e13f40be5b28bdb238dc49f68c3ecdd41e7aab5662af00df87
Binary file
data.tar.gz.sig CHANGED
Binary file
@@ -2,102 +2,54 @@
2
2
 
3
3
  # file: dxtitle_search.rb
4
4
 
5
- require 'dynarex'
5
+ require 'indexer101'
6
6
 
7
7
 
8
8
  class DxTitleSearch
9
9
 
10
- def initialize(obj=nil, sources: obj, debug: false)
10
+ def initialize(obj=nil, sources: nil, debug: false)
11
11
 
12
12
  @debug = debug
13
+ @indexer = Indexer101.new debug: debug
13
14
 
14
15
  s = if sources then
15
16
 
16
- dx = Dynarex.new(sources)
17
- dx.all.map {|x| read x.uri }.join
17
+ dx = Dynarex.new(sources)
18
+ @indexer.scan_dxindex dx.all.map(&:uri), level: 1
18
19
 
19
- elsif obj then
20
-
21
- # is it a Dynarex file location?
22
- if obj.lines.length < 2 then
20
+ elsif obj and obj.lines.length < 2
23
21
 
24
- read obj
25
-
26
- else
27
-
28
- obj
22
+ @indexer.scan_dxindex obj, level: 1
29
23
 
30
- end
31
24
  end
32
25
 
33
- @h = h = s.lines.inject({}) do |r,x|
34
- key, value = x.split(/\s+(?=[^\s]+$)/,2)
35
- r.merge(key.rstrip => value)
36
- end
37
-
38
- @a = h.keys
26
+ @indexer.build
39
27
 
40
28
  end
41
29
 
42
- def search(keywords)
43
-
44
- phrases = @a.grep /#{keywords}/i
45
-
46
- # find out the keywords count for each entry found
47
- a0 = keywords.split.flat_map do |x|
48
- next if @a.length < 2
49
- @a.grep /#{x}/i
50
- end
51
-
52
- a = a0.uniq.map do |entry|
53
- [entry, entry.scan(/#{keywords.split.join('|')}/).uniq.count]
54
- end
55
-
56
- # sort by keywords found per entry and then date
57
- #a2 = (phrases + a).uniq.sort do |x, x2|
58
- a2 = a.sort do |x, x2|
59
- -([x.last, x.first[/^\d+/], ] <=> [x2.last, x2.first[/^\d+/]])
60
- end
30
+ def search(keywords)
61
31
 
32
+ a2 = @indexer.search keywords.split(/[\s:"!\?\(\)£]+(?=[\w#_'-]+)/)
62
33
  # format each result as a Hash object
63
- a3 = (phrases + a2).map do |x|
34
+ a3 = a2.map do |date, title, url|
64
35
 
65
- if x.length > 1 then
66
- line, _ = x
67
- else
68
- line = x
69
- end
70
-
71
- puts 'line: ' + line.inspect if @debug
72
-
73
- rawtime, title = line.split(/ +/,2)
74
- puts 'title: ' + title.inspect if @debug
75
-
76
- {title: title, url: @h[line].chomp, date: Time.at(rawtime.to_i)}
36
+ {title: title, url: url, date: date}
77
37
 
78
38
  end
79
39
 
80
40
  puts 'a3: ' + a3.inspect if @debug
41
+
42
+ def a3.to_dx()
43
+ Dynarex.new('results/result(title, url, date)').import(self)
44
+ end
81
45
 
82
46
  return a3
83
47
 
84
48
  end
85
49
 
86
- def tag_search(keywords)
87
- a = @a.flat_map {|x| x.split(/#/,2).last.split(/\s*#/)}
88
- a.grep(/^#{keywords}/i).map(&:downcase).uniq
89
- end
90
-
91
- private
92
-
93
- def read(source)
94
-
95
- dx = Dynarex.new(source)
96
-
97
- dx.all.map do |x|
98
- "%d %s %s" % [Time.parse(x.created).to_i, x.title, x.url]
99
- end.join("\n")
100
-
101
- end
50
+ def tag_search(keywords)
51
+ r = @indexer.lookup *keywords.split(/[\W]+(?=[\w]+)/).map {|x| "#" + x}
52
+ r.map {|x| x.to_s[1..-1]}
53
+ end
102
54
 
103
55
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dxtitle_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,28 +35,28 @@ cert_chain:
35
35
  ODgOmcgAsTin0T9l0vWQJnj2OClh1pcC6tFnQXXcYuwTpbtM2v4OmUfUDvJmwQ42
36
36
  POydZUYZg49X9TlTDqPMKed0
37
37
  -----END CERTIFICATE-----
38
- date: 2019-08-11 00:00:00.000000000 Z
38
+ date: 2019-11-12 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
- name: dynarex
41
+ name: indexer101
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '1.8'
46
+ version: '0.2'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 1.8.19
49
+ version: 0.2.1
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - "~>"
55
55
  - !ruby/object:Gem::Version
56
- version: '1.8'
56
+ version: '0.2'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 1.8.19
59
+ version: 0.2.1
60
60
  description:
61
61
  email: james@jamesrobertson.eu
62
62
  executables: []
@@ -83,8 +83,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  - !ruby/object:Gem::Version
84
84
  version: '0'
85
85
  requirements: []
86
- rubygems_version: 3.0.1
86
+ rubygems_version: 3.0.3
87
87
  signing_key:
88
88
  specification_version: 4
89
- summary: Search title entries from plain text (derived from a Dynarex document).
89
+ summary: Originally intended to search title entries from plain text (derived from
90
+ a Dynarex document).
90
91
  test_files: []
metadata.gz.sig CHANGED
Binary file