enumark 0.1.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/README.md +26 -1
- data/enumark.gemspec +1 -1
- data/lib/enumark/category.rb +21 -0
- data/lib/enumark/config.rb +17 -0
- data/lib/enumark/dir.rb +42 -0
- data/lib/enumark/grouping.rb +29 -0
- data/lib/enumark/item.rb +45 -0
- data/lib/enumark/version.rb +1 -1
- data/lib/enumark.rb +41 -104
- metadata +10 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fa3a4cc7fa496b1863e6de8c004144d85d983e1b8886f252c909c1031b215f66
|
4
|
+
data.tar.gz: a462ef2d2b0ee84b4330e6daa2b528bbf1e643181715ff8bfb0030c2fabc6207
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0712afefeb2f3950b7763cb4d2246e39034a393bf8beb75bc88e10c22f6d45122fcf19f38cda647f4f3ec441705648b46d88fc729f3162cc2cfc50bb66bbd1a3
|
7
|
+
data.tar.gz: 482ce353a24835363a27a64b15e0d74e08e352ab57cc5e19b0008f3bfa0dabbeeed195b669f006d212bc76fcd8a604d8e16bb077d9e1fb85fcf012eb52ba348c
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
3.1.1
|
data/README.md
CHANGED
@@ -27,6 +27,8 @@ enum = Enumark.new('/path/to/bookmark_dump_file')
|
|
27
27
|
|
28
28
|
enum.each do |item|
|
29
29
|
item.name
|
30
|
+
item.dump_date
|
31
|
+
item.add_date
|
30
32
|
item.href
|
31
33
|
item.host
|
32
34
|
item.categories
|
@@ -46,6 +48,29 @@ enum.each_dup_href do |href|
|
|
46
48
|
href.name
|
47
49
|
href.items
|
48
50
|
end
|
51
|
+
|
52
|
+
enum.each_category do |cate|
|
53
|
+
cate.name
|
54
|
+
cate.items
|
55
|
+
end
|
56
|
+
```
|
57
|
+
|
58
|
+
Explore trends of your dump files:
|
59
|
+
|
60
|
+
```ruby
|
61
|
+
dir = Enumark::Dir.new('/path/to/directory_with_bookmark_dump_files_more_than_one')
|
62
|
+
|
63
|
+
dir.added # select items in last file but not in second to last
|
64
|
+
dir.deleted # reject items in last file
|
65
|
+
dir.uniq # union all items
|
66
|
+
dir.static # select items appear in all files
|
67
|
+
dir.all # enumerator of all items in all files
|
68
|
+
```
|
69
|
+
|
70
|
+
Set config
|
71
|
+
|
72
|
+
```ruby
|
73
|
+
Enumark::Config.set(logger: STDOUT)
|
49
74
|
```
|
50
75
|
|
51
76
|
## Development
|
@@ -56,7 +81,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
56
81
|
|
57
82
|
## Contributing
|
58
83
|
|
59
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
84
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/turnon/enumark. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/enumark/blob/master/CODE_OF_CONDUCT.md).
|
60
85
|
|
61
86
|
## License
|
62
87
|
|
data/enumark.gemspec
CHANGED
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.summary = "Enumerate chrome bookmark dump file"
|
12
12
|
spec.homepage = "https://github.com/turnon/enumark"
|
13
13
|
spec.license = "MIT"
|
14
|
-
spec.required_ruby_version = Gem::Requirement.new(">=
|
14
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 3.1.0")
|
15
15
|
|
16
16
|
# Specify which files should be added to the gem when it is released.
|
17
17
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Enumark
|
4
|
+
class Category
|
5
|
+
|
6
|
+
START = /^\s.*<DT><H3/
|
7
|
+
ENDIND = /^\s.*<\/DL><p>/
|
8
|
+
PATTERN = /ADD_DATE="(.*?)".*LAST_MODIFIED="(.*?)".*>(.*)<\/H3/
|
9
|
+
|
10
|
+
attr_reader :name
|
11
|
+
alias_method :inspect, :name
|
12
|
+
alias_method :to_s, :name
|
13
|
+
|
14
|
+
def initialize(line)
|
15
|
+
m = line.match(PATTERN)
|
16
|
+
@add_date = m[1]
|
17
|
+
@last_mod = m[2]
|
18
|
+
@name = m[3]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/enumark/dir.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Enumark
|
4
|
+
class Dir
|
5
|
+
def initialize(dir)
|
6
|
+
@enumarks = ::Dir.glob(dir).map{ |f| ::Enumark.new(f) }
|
7
|
+
raise 'Not enough to process' if @enumarks.count <= 1
|
8
|
+
end
|
9
|
+
|
10
|
+
def added
|
11
|
+
@added ||= (@enumarks[-1] - @enumarks[-2])
|
12
|
+
end
|
13
|
+
|
14
|
+
def deleted
|
15
|
+
@deleted ||= @enumarks[0..-2].reverse_each.reduce(&:|) - @enumarks[-1]
|
16
|
+
end
|
17
|
+
|
18
|
+
def uniq
|
19
|
+
@uniq ||= @enumarks.reverse_each.reduce(&:|)
|
20
|
+
end
|
21
|
+
|
22
|
+
def static
|
23
|
+
@static ||= @enumarks.reverse_each.reduce(&:&)
|
24
|
+
end
|
25
|
+
|
26
|
+
def all
|
27
|
+
Enumerator.new do |yielder|
|
28
|
+
logger = Config.get(:logger)
|
29
|
+
file_count = @enumarks.count
|
30
|
+
|
31
|
+
@enumarks.each_with_index do |enum, idx|
|
32
|
+
enum.each do |item|
|
33
|
+
yielder << item
|
34
|
+
logger.printf("--> %6d/%-6d = %3f \r", idx + 1, file_count, ((idx + 1).to_f / file_count * 100).round(2)) if logger
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
logger.puts if logger
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Enumark
|
4
|
+
|
5
|
+
class Grouping
|
6
|
+
Group = Struct.new(:name, :items)
|
7
|
+
|
8
|
+
def initialize(enumark, key, &post)
|
9
|
+
@lock = Mutex.new
|
10
|
+
@collection = nil
|
11
|
+
|
12
|
+
@enumark = enumark
|
13
|
+
@key = key
|
14
|
+
@post = post
|
15
|
+
end
|
16
|
+
|
17
|
+
def each(&block)
|
18
|
+
unless @collection
|
19
|
+
@lock.synchronize do
|
20
|
+
@collection = @enumark.group_by(&@key)
|
21
|
+
@collection = @post.call(@collection) if @post
|
22
|
+
@collection = @collection.map{ |k, items| Group.new(k, items) }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
@collection.each(&block)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/enumark/item.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Enumark
|
4
|
+
class Item
|
5
|
+
|
6
|
+
PREFIX = /^\s.*<DT><A/
|
7
|
+
PATTERN = /HREF="(.*?)".*ADD_DATE="(.*?)".*>(.*)<\/A>/
|
8
|
+
USELESS_SHARP = /\#.*$/
|
9
|
+
|
10
|
+
attr_reader :dump_date, :name, :href, :add_date, :categories
|
11
|
+
|
12
|
+
def initialize(dump_date, line, categories)
|
13
|
+
@dump_date = dump_date
|
14
|
+
m = line.match(PATTERN)
|
15
|
+
@href = m[1].gsub(USELESS_SHARP, '')
|
16
|
+
@add_date = Time.at(m[2].to_i)
|
17
|
+
@name = m[3]
|
18
|
+
@categories = categories
|
19
|
+
end
|
20
|
+
|
21
|
+
def inspect
|
22
|
+
@inspect ||= "#{add_date.strftime('%F %T')} #{categories_str}> #{name}"
|
23
|
+
end
|
24
|
+
|
25
|
+
def categories_str
|
26
|
+
@categories_str ||= "/#{categories.join('/')}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_s
|
30
|
+
inspect
|
31
|
+
end
|
32
|
+
|
33
|
+
def hash
|
34
|
+
href.hash
|
35
|
+
end
|
36
|
+
|
37
|
+
def eql?(another)
|
38
|
+
href.eql?(another.href)
|
39
|
+
end
|
40
|
+
|
41
|
+
def host
|
42
|
+
@host ||= (URI.parse(href).host rescue 'unknown')
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/enumark/version.rb
CHANGED
data/lib/enumark.rb
CHANGED
@@ -1,115 +1,29 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "enumark/version"
|
4
|
+
require 'enumark/item'
|
5
|
+
require 'enumark/category'
|
6
|
+
require 'enumark/grouping'
|
7
|
+
require 'enumark/dir'
|
8
|
+
require 'enumark/config'
|
4
9
|
|
5
10
|
class Enumark
|
6
11
|
include Enumerable
|
7
12
|
|
8
|
-
|
9
|
-
CATEGORY_END = /^\s.*<\/DL><p>/
|
10
|
-
CATEGORY_NAME = /ADD_DATE="(.*?)".*LAST_MODIFIED="(.*?)".*>(.*)<\/H3/
|
11
|
-
|
12
|
-
ITEM_PREFIX = /^\s.*<DT><A/
|
13
|
-
ITEM_NAME = /HREF="(.*?)".*ADD_DATE="(.*?)".*>(.*)<\/A>/
|
14
|
-
|
15
|
-
class Category
|
16
|
-
attr_reader :name
|
17
|
-
alias_method :inspect, :name
|
18
|
-
alias_method :to_s, :name
|
19
|
-
|
20
|
-
def initialize(line)
|
21
|
-
m = line.match(CATEGORY_NAME)
|
22
|
-
@add_date = m[1]
|
23
|
-
@last_mod = m[2]
|
24
|
-
@name = m[3]
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
class Item
|
29
|
-
attr_reader :name, :href, :categories
|
30
|
-
|
31
|
-
USELESS_SHARP = /\#.*$/
|
32
|
-
|
33
|
-
def initialize(line, categories)
|
34
|
-
m = line.match(ITEM_NAME)
|
35
|
-
@href = m[1].gsub(USELESS_SHARP, '')
|
36
|
-
@add_date = m[2]
|
37
|
-
@name = m[3]
|
38
|
-
@categories = categories
|
39
|
-
end
|
40
|
-
|
41
|
-
def inspect
|
42
|
-
@inspect ||= "/#{categories.join('/')}> #{name}"
|
43
|
-
end
|
44
|
-
|
45
|
-
def to_s
|
46
|
-
inspect
|
47
|
-
end
|
48
|
-
|
49
|
-
def host
|
50
|
-
@host ||= (URI.parse(href).host rescue 'unknown')
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
class Hostname
|
55
|
-
attr_reader :name, :items
|
56
|
-
|
57
|
-
def initialize(name)
|
58
|
-
@name = name
|
59
|
-
@items = []
|
60
|
-
end
|
61
|
-
|
62
|
-
def add(item)
|
63
|
-
@items << item
|
64
|
-
end
|
65
|
-
|
66
|
-
def inspect
|
67
|
-
@name
|
68
|
-
end
|
69
|
-
|
70
|
-
def to
|
71
|
-
inspect
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
class Grouping
|
76
|
-
Group = Struct.new(:name, :items)
|
77
|
-
|
78
|
-
def initialize(enumark, key, &post)
|
79
|
-
@lock = Mutex.new
|
80
|
-
@collection = nil
|
81
|
-
|
82
|
-
@enumark = enumark
|
83
|
-
@key = key
|
84
|
-
@post = post
|
85
|
-
end
|
86
|
-
|
87
|
-
def each(&block)
|
88
|
-
unless @collection
|
89
|
-
@lock.synchronize do
|
90
|
-
@collection = @enumark.group_by(&@key)
|
91
|
-
@collection = @post.call(@collection) if @post
|
92
|
-
@collection = @collection.map{ |k, items| Group.new(k, items) }
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
@collection.each(&block)
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
def initialize(file)
|
13
|
+
def initialize(file, items: nil)
|
101
14
|
@file = file
|
102
15
|
@lock = Mutex.new
|
103
|
-
@
|
104
|
-
@items = []
|
16
|
+
@items = items
|
105
17
|
|
106
18
|
@hosts = Grouping.new(self, :host)
|
107
19
|
@dup_titles = Grouping.new(self, :name){ |groups| groups.select{ |_, items| items.count > 1 } }
|
108
20
|
@dup_hrefs = Grouping.new(self, :href){ |groups| groups.select{ |_, items| items.count > 1 } }
|
21
|
+
@cates = Grouping.new(self, :categories_str)
|
109
22
|
end
|
110
23
|
|
111
24
|
def each(&block)
|
112
25
|
read_all_lines
|
26
|
+
sort_by_add_date!
|
113
27
|
@items.each(&block)
|
114
28
|
end
|
115
29
|
|
@@ -125,32 +39,55 @@ class Enumark
|
|
125
39
|
@dup_hrefs.each(&block)
|
126
40
|
end
|
127
41
|
|
42
|
+
def each_category(&block)
|
43
|
+
@cates.each(&block)
|
44
|
+
end
|
45
|
+
|
46
|
+
[:+ ,:-, :&, :|].each do |op|
|
47
|
+
class_eval <<-EOM
|
48
|
+
def #{op}(another)
|
49
|
+
new_items = self.to_a #{op} another.to_a
|
50
|
+
Enumark.new(nil, items: new_items)
|
51
|
+
end
|
52
|
+
EOM
|
53
|
+
end
|
54
|
+
|
128
55
|
private
|
129
56
|
|
130
57
|
def read_all_lines
|
131
|
-
return if @
|
58
|
+
return if @items
|
132
59
|
|
133
60
|
@lock.synchronize do
|
134
|
-
|
135
|
-
|
136
|
-
_read_all_lines
|
137
|
-
@read = true
|
61
|
+
_read_all_lines unless @items
|
138
62
|
end
|
139
63
|
end
|
140
64
|
|
141
65
|
def _read_all_lines
|
66
|
+
dump_date = Pathname.new(@file).basename('.html').to_s
|
142
67
|
categories = []
|
68
|
+
@items = []
|
143
69
|
|
144
70
|
File.new(@file).each do |line|
|
145
71
|
case line
|
146
|
-
when
|
147
|
-
item = Item.new(line, categories.dup)
|
72
|
+
when Item::PREFIX
|
73
|
+
item = Item.new(dump_date, line, categories.dup)
|
148
74
|
@items.push(item)
|
149
|
-
when
|
75
|
+
when Category::START
|
150
76
|
categories.push(Category.new(line))
|
151
|
-
when
|
77
|
+
when Category::ENDIND
|
152
78
|
categories.pop
|
153
79
|
end
|
154
80
|
end
|
155
81
|
end
|
82
|
+
|
83
|
+
def sort_by_add_date!
|
84
|
+
return if @sorted
|
85
|
+
|
86
|
+
@lock.synchronize do
|
87
|
+
next if @sorted
|
88
|
+
|
89
|
+
@items.sort!{ |i1, i2| i2.add_date <=> i1.add_date }
|
90
|
+
@sorted = true
|
91
|
+
end
|
92
|
+
end
|
156
93
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: enumark
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ken
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-03-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pry
|
@@ -32,6 +32,7 @@ extensions: []
|
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
34
|
- ".gitignore"
|
35
|
+
- ".ruby-version"
|
35
36
|
- CODE_OF_CONDUCT.md
|
36
37
|
- Gemfile
|
37
38
|
- LICENSE.txt
|
@@ -41,6 +42,11 @@ files:
|
|
41
42
|
- bin/setup
|
42
43
|
- enumark.gemspec
|
43
44
|
- lib/enumark.rb
|
45
|
+
- lib/enumark/category.rb
|
46
|
+
- lib/enumark/config.rb
|
47
|
+
- lib/enumark/dir.rb
|
48
|
+
- lib/enumark/grouping.rb
|
49
|
+
- lib/enumark/item.rb
|
44
50
|
- lib/enumark/version.rb
|
45
51
|
homepage: https://github.com/turnon/enumark
|
46
52
|
licenses:
|
@@ -54,14 +60,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
54
60
|
requirements:
|
55
61
|
- - ">="
|
56
62
|
- !ruby/object:Gem::Version
|
57
|
-
version:
|
63
|
+
version: 3.1.0
|
58
64
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
59
65
|
requirements:
|
60
66
|
- - ">="
|
61
67
|
- !ruby/object:Gem::Version
|
62
68
|
version: '0'
|
63
69
|
requirements: []
|
64
|
-
rubygems_version: 3.
|
70
|
+
rubygems_version: 3.3.7
|
65
71
|
signing_key:
|
66
72
|
specification_version: 4
|
67
73
|
summary: Enumerate chrome bookmark dump file
|