enumark 0.1.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/README.md +26 -1
- data/enumark.gemspec +1 -1
- data/lib/enumark/category.rb +21 -0
- data/lib/enumark/config.rb +17 -0
- data/lib/enumark/dir.rb +42 -0
- data/lib/enumark/grouping.rb +29 -0
- data/lib/enumark/item.rb +45 -0
- data/lib/enumark/version.rb +1 -1
- data/lib/enumark.rb +41 -104
- metadata +10 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fa3a4cc7fa496b1863e6de8c004144d85d983e1b8886f252c909c1031b215f66
|
4
|
+
data.tar.gz: a462ef2d2b0ee84b4330e6daa2b528bbf1e643181715ff8bfb0030c2fabc6207
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0712afefeb2f3950b7763cb4d2246e39034a393bf8beb75bc88e10c22f6d45122fcf19f38cda647f4f3ec441705648b46d88fc729f3162cc2cfc50bb66bbd1a3
|
7
|
+
data.tar.gz: 482ce353a24835363a27a64b15e0d74e08e352ab57cc5e19b0008f3bfa0dabbeeed195b669f006d212bc76fcd8a604d8e16bb077d9e1fb85fcf012eb52ba348c
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
3.1.1
|
data/README.md
CHANGED
@@ -27,6 +27,8 @@ enum = Enumark.new('/path/to/bookmark_dump_file')
|
|
27
27
|
|
28
28
|
enum.each do |item|
|
29
29
|
item.name
|
30
|
+
item.dump_date
|
31
|
+
item.add_date
|
30
32
|
item.href
|
31
33
|
item.host
|
32
34
|
item.categories
|
@@ -46,6 +48,29 @@ enum.each_dup_href do |href|
|
|
46
48
|
href.name
|
47
49
|
href.items
|
48
50
|
end
|
51
|
+
|
52
|
+
enum.each_category do |cate|
|
53
|
+
cate.name
|
54
|
+
cate.items
|
55
|
+
end
|
56
|
+
```
|
57
|
+
|
58
|
+
Explore trends of your dump files:
|
59
|
+
|
60
|
+
```ruby
|
61
|
+
dir = Enumark::Dir.new('/path/to/directory_with_bookmark_dump_files_more_than_one')
|
62
|
+
|
63
|
+
dir.added # select items in last file but not in second to last
|
64
|
+
dir.deleted # reject items in last file
|
65
|
+
dir.uniq # union all items
|
66
|
+
dir.static # select items appear in all files
|
67
|
+
dir.all # enumerator of all items in all files
|
68
|
+
```
|
69
|
+
|
70
|
+
Set config
|
71
|
+
|
72
|
+
```ruby
|
73
|
+
Enumark::Config.set(logger: STDOUT)
|
49
74
|
```
|
50
75
|
|
51
76
|
## Development
|
@@ -56,7 +81,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
56
81
|
|
57
82
|
## Contributing
|
58
83
|
|
59
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
84
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/turnon/enumark. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/enumark/blob/master/CODE_OF_CONDUCT.md).
|
60
85
|
|
61
86
|
## License
|
62
87
|
|
data/enumark.gemspec
CHANGED
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.summary = "Enumerate chrome bookmark dump file"
|
12
12
|
spec.homepage = "https://github.com/turnon/enumark"
|
13
13
|
spec.license = "MIT"
|
14
|
-
spec.required_ruby_version = Gem::Requirement.new(">=
|
14
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 3.1.0")
|
15
15
|
|
16
16
|
# Specify which files should be added to the gem when it is released.
|
17
17
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Enumark
|
4
|
+
class Category
|
5
|
+
|
6
|
+
START = /^\s.*<DT><H3/
|
7
|
+
ENDIND = /^\s.*<\/DL><p>/
|
8
|
+
PATTERN = /ADD_DATE="(.*?)".*LAST_MODIFIED="(.*?)".*>(.*)<\/H3/
|
9
|
+
|
10
|
+
attr_reader :name
|
11
|
+
alias_method :inspect, :name
|
12
|
+
alias_method :to_s, :name
|
13
|
+
|
14
|
+
def initialize(line)
|
15
|
+
m = line.match(PATTERN)
|
16
|
+
@add_date = m[1]
|
17
|
+
@last_mod = m[2]
|
18
|
+
@name = m[3]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/enumark/dir.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Enumark
|
4
|
+
class Dir
|
5
|
+
def initialize(dir)
|
6
|
+
@enumarks = ::Dir.glob(dir).map{ |f| ::Enumark.new(f) }
|
7
|
+
raise 'Not enough to process' if @enumarks.count <= 1
|
8
|
+
end
|
9
|
+
|
10
|
+
def added
|
11
|
+
@added ||= (@enumarks[-1] - @enumarks[-2])
|
12
|
+
end
|
13
|
+
|
14
|
+
def deleted
|
15
|
+
@deleted ||= @enumarks[0..-2].reverse_each.reduce(&:|) - @enumarks[-1]
|
16
|
+
end
|
17
|
+
|
18
|
+
def uniq
|
19
|
+
@uniq ||= @enumarks.reverse_each.reduce(&:|)
|
20
|
+
end
|
21
|
+
|
22
|
+
def static
|
23
|
+
@static ||= @enumarks.reverse_each.reduce(&:&)
|
24
|
+
end
|
25
|
+
|
26
|
+
def all
|
27
|
+
Enumerator.new do |yielder|
|
28
|
+
logger = Config.get(:logger)
|
29
|
+
file_count = @enumarks.count
|
30
|
+
|
31
|
+
@enumarks.each_with_index do |enum, idx|
|
32
|
+
enum.each do |item|
|
33
|
+
yielder << item
|
34
|
+
logger.printf("--> %6d/%-6d = %3f \r", idx + 1, file_count, ((idx + 1).to_f / file_count * 100).round(2)) if logger
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
logger.puts if logger
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Enumark
|
4
|
+
|
5
|
+
class Grouping
|
6
|
+
Group = Struct.new(:name, :items)
|
7
|
+
|
8
|
+
def initialize(enumark, key, &post)
|
9
|
+
@lock = Mutex.new
|
10
|
+
@collection = nil
|
11
|
+
|
12
|
+
@enumark = enumark
|
13
|
+
@key = key
|
14
|
+
@post = post
|
15
|
+
end
|
16
|
+
|
17
|
+
def each(&block)
|
18
|
+
unless @collection
|
19
|
+
@lock.synchronize do
|
20
|
+
@collection = @enumark.group_by(&@key)
|
21
|
+
@collection = @post.call(@collection) if @post
|
22
|
+
@collection = @collection.map{ |k, items| Group.new(k, items) }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
@collection.each(&block)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/enumark/item.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Enumark
|
4
|
+
class Item
|
5
|
+
|
6
|
+
PREFIX = /^\s.*<DT><A/
|
7
|
+
PATTERN = /HREF="(.*?)".*ADD_DATE="(.*?)".*>(.*)<\/A>/
|
8
|
+
USELESS_SHARP = /\#.*$/
|
9
|
+
|
10
|
+
attr_reader :dump_date, :name, :href, :add_date, :categories
|
11
|
+
|
12
|
+
def initialize(dump_date, line, categories)
|
13
|
+
@dump_date = dump_date
|
14
|
+
m = line.match(PATTERN)
|
15
|
+
@href = m[1].gsub(USELESS_SHARP, '')
|
16
|
+
@add_date = Time.at(m[2].to_i)
|
17
|
+
@name = m[3]
|
18
|
+
@categories = categories
|
19
|
+
end
|
20
|
+
|
21
|
+
def inspect
|
22
|
+
@inspect ||= "#{add_date.strftime('%F %T')} #{categories_str}> #{name}"
|
23
|
+
end
|
24
|
+
|
25
|
+
def categories_str
|
26
|
+
@categories_str ||= "/#{categories.join('/')}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_s
|
30
|
+
inspect
|
31
|
+
end
|
32
|
+
|
33
|
+
def hash
|
34
|
+
href.hash
|
35
|
+
end
|
36
|
+
|
37
|
+
def eql?(another)
|
38
|
+
href.eql?(another.href)
|
39
|
+
end
|
40
|
+
|
41
|
+
def host
|
42
|
+
@host ||= (URI.parse(href).host rescue 'unknown')
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/enumark/version.rb
CHANGED
data/lib/enumark.rb
CHANGED
@@ -1,115 +1,29 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "enumark/version"
|
4
|
+
require 'enumark/item'
|
5
|
+
require 'enumark/category'
|
6
|
+
require 'enumark/grouping'
|
7
|
+
require 'enumark/dir'
|
8
|
+
require 'enumark/config'
|
4
9
|
|
5
10
|
class Enumark
|
6
11
|
include Enumerable
|
7
12
|
|
8
|
-
|
9
|
-
CATEGORY_END = /^\s.*<\/DL><p>/
|
10
|
-
CATEGORY_NAME = /ADD_DATE="(.*?)".*LAST_MODIFIED="(.*?)".*>(.*)<\/H3/
|
11
|
-
|
12
|
-
ITEM_PREFIX = /^\s.*<DT><A/
|
13
|
-
ITEM_NAME = /HREF="(.*?)".*ADD_DATE="(.*?)".*>(.*)<\/A>/
|
14
|
-
|
15
|
-
class Category
|
16
|
-
attr_reader :name
|
17
|
-
alias_method :inspect, :name
|
18
|
-
alias_method :to_s, :name
|
19
|
-
|
20
|
-
def initialize(line)
|
21
|
-
m = line.match(CATEGORY_NAME)
|
22
|
-
@add_date = m[1]
|
23
|
-
@last_mod = m[2]
|
24
|
-
@name = m[3]
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
class Item
|
29
|
-
attr_reader :name, :href, :categories
|
30
|
-
|
31
|
-
USELESS_SHARP = /\#.*$/
|
32
|
-
|
33
|
-
def initialize(line, categories)
|
34
|
-
m = line.match(ITEM_NAME)
|
35
|
-
@href = m[1].gsub(USELESS_SHARP, '')
|
36
|
-
@add_date = m[2]
|
37
|
-
@name = m[3]
|
38
|
-
@categories = categories
|
39
|
-
end
|
40
|
-
|
41
|
-
def inspect
|
42
|
-
@inspect ||= "/#{categories.join('/')}> #{name}"
|
43
|
-
end
|
44
|
-
|
45
|
-
def to_s
|
46
|
-
inspect
|
47
|
-
end
|
48
|
-
|
49
|
-
def host
|
50
|
-
@host ||= (URI.parse(href).host rescue 'unknown')
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
class Hostname
|
55
|
-
attr_reader :name, :items
|
56
|
-
|
57
|
-
def initialize(name)
|
58
|
-
@name = name
|
59
|
-
@items = []
|
60
|
-
end
|
61
|
-
|
62
|
-
def add(item)
|
63
|
-
@items << item
|
64
|
-
end
|
65
|
-
|
66
|
-
def inspect
|
67
|
-
@name
|
68
|
-
end
|
69
|
-
|
70
|
-
def to
|
71
|
-
inspect
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
class Grouping
|
76
|
-
Group = Struct.new(:name, :items)
|
77
|
-
|
78
|
-
def initialize(enumark, key, &post)
|
79
|
-
@lock = Mutex.new
|
80
|
-
@collection = nil
|
81
|
-
|
82
|
-
@enumark = enumark
|
83
|
-
@key = key
|
84
|
-
@post = post
|
85
|
-
end
|
86
|
-
|
87
|
-
def each(&block)
|
88
|
-
unless @collection
|
89
|
-
@lock.synchronize do
|
90
|
-
@collection = @enumark.group_by(&@key)
|
91
|
-
@collection = @post.call(@collection) if @post
|
92
|
-
@collection = @collection.map{ |k, items| Group.new(k, items) }
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
@collection.each(&block)
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
def initialize(file)
|
13
|
+
def initialize(file, items: nil)
|
101
14
|
@file = file
|
102
15
|
@lock = Mutex.new
|
103
|
-
@
|
104
|
-
@items = []
|
16
|
+
@items = items
|
105
17
|
|
106
18
|
@hosts = Grouping.new(self, :host)
|
107
19
|
@dup_titles = Grouping.new(self, :name){ |groups| groups.select{ |_, items| items.count > 1 } }
|
108
20
|
@dup_hrefs = Grouping.new(self, :href){ |groups| groups.select{ |_, items| items.count > 1 } }
|
21
|
+
@cates = Grouping.new(self, :categories_str)
|
109
22
|
end
|
110
23
|
|
111
24
|
def each(&block)
|
112
25
|
read_all_lines
|
26
|
+
sort_by_add_date!
|
113
27
|
@items.each(&block)
|
114
28
|
end
|
115
29
|
|
@@ -125,32 +39,55 @@ class Enumark
|
|
125
39
|
@dup_hrefs.each(&block)
|
126
40
|
end
|
127
41
|
|
42
|
+
def each_category(&block)
|
43
|
+
@cates.each(&block)
|
44
|
+
end
|
45
|
+
|
46
|
+
[:+ ,:-, :&, :|].each do |op|
|
47
|
+
class_eval <<-EOM
|
48
|
+
def #{op}(another)
|
49
|
+
new_items = self.to_a #{op} another.to_a
|
50
|
+
Enumark.new(nil, items: new_items)
|
51
|
+
end
|
52
|
+
EOM
|
53
|
+
end
|
54
|
+
|
128
55
|
private
|
129
56
|
|
130
57
|
def read_all_lines
|
131
|
-
return if @
|
58
|
+
return if @items
|
132
59
|
|
133
60
|
@lock.synchronize do
|
134
|
-
|
135
|
-
|
136
|
-
_read_all_lines
|
137
|
-
@read = true
|
61
|
+
_read_all_lines unless @items
|
138
62
|
end
|
139
63
|
end
|
140
64
|
|
141
65
|
def _read_all_lines
|
66
|
+
dump_date = Pathname.new(@file).basename('.html').to_s
|
142
67
|
categories = []
|
68
|
+
@items = []
|
143
69
|
|
144
70
|
File.new(@file).each do |line|
|
145
71
|
case line
|
146
|
-
when
|
147
|
-
item = Item.new(line, categories.dup)
|
72
|
+
when Item::PREFIX
|
73
|
+
item = Item.new(dump_date, line, categories.dup)
|
148
74
|
@items.push(item)
|
149
|
-
when
|
75
|
+
when Category::START
|
150
76
|
categories.push(Category.new(line))
|
151
|
-
when
|
77
|
+
when Category::ENDIND
|
152
78
|
categories.pop
|
153
79
|
end
|
154
80
|
end
|
155
81
|
end
|
82
|
+
|
83
|
+
def sort_by_add_date!
|
84
|
+
return if @sorted
|
85
|
+
|
86
|
+
@lock.synchronize do
|
87
|
+
next if @sorted
|
88
|
+
|
89
|
+
@items.sort!{ |i1, i2| i2.add_date <=> i1.add_date }
|
90
|
+
@sorted = true
|
91
|
+
end
|
92
|
+
end
|
156
93
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: enumark
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ken
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-03-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pry
|
@@ -32,6 +32,7 @@ extensions: []
|
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
34
|
- ".gitignore"
|
35
|
+
- ".ruby-version"
|
35
36
|
- CODE_OF_CONDUCT.md
|
36
37
|
- Gemfile
|
37
38
|
- LICENSE.txt
|
@@ -41,6 +42,11 @@ files:
|
|
41
42
|
- bin/setup
|
42
43
|
- enumark.gemspec
|
43
44
|
- lib/enumark.rb
|
45
|
+
- lib/enumark/category.rb
|
46
|
+
- lib/enumark/config.rb
|
47
|
+
- lib/enumark/dir.rb
|
48
|
+
- lib/enumark/grouping.rb
|
49
|
+
- lib/enumark/item.rb
|
44
50
|
- lib/enumark/version.rb
|
45
51
|
homepage: https://github.com/turnon/enumark
|
46
52
|
licenses:
|
@@ -54,14 +60,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
54
60
|
requirements:
|
55
61
|
- - ">="
|
56
62
|
- !ruby/object:Gem::Version
|
57
|
-
version:
|
63
|
+
version: 3.1.0
|
58
64
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
59
65
|
requirements:
|
60
66
|
- - ">="
|
61
67
|
- !ruby/object:Gem::Version
|
62
68
|
version: '0'
|
63
69
|
requirements: []
|
64
|
-
rubygems_version: 3.
|
70
|
+
rubygems_version: 3.3.7
|
65
71
|
signing_key:
|
66
72
|
specification_version: 4
|
67
73
|
summary: Enumerate chrome bookmark dump file
|