enumark 0.1.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5ff7445e38a40b2f5e2e7196c9873833d1853c8994cf0ac13699c322174658ee
4
- data.tar.gz: 2d679d07bed90e0973701f6fd01a5a6b2c0c86bf61529a97d3ade1d57d68ca4c
3
+ metadata.gz: fa3a4cc7fa496b1863e6de8c004144d85d983e1b8886f252c909c1031b215f66
4
+ data.tar.gz: a462ef2d2b0ee84b4330e6daa2b528bbf1e643181715ff8bfb0030c2fabc6207
5
5
  SHA512:
6
- metadata.gz: 6ab4b15cf7de8e56c3af1e681f369258e9806b34736e2ffb5df85c4195a9be354c272d46e2627c5f1bf8bd69b26f9144ac1f46736835926f481c2a5bc0204a92
7
- data.tar.gz: c86177d92d36168653a669d5f11ed2136549026760c042a89ad385aece50bcc1faf69e269c30fcf2ae57d53b9df839db200fe3a0a7cd5f335b28463fdfc2a255
6
+ metadata.gz: 0712afefeb2f3950b7763cb4d2246e39034a393bf8beb75bc88e10c22f6d45122fcf19f38cda647f4f3ec441705648b46d88fc729f3162cc2cfc50bb66bbd1a3
7
+ data.tar.gz: 482ce353a24835363a27a64b15e0d74e08e352ab57cc5e19b0008f3bfa0dabbeeed195b669f006d212bc76fcd8a604d8e16bb077d9e1fb85fcf012eb52ba348c
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.1.1
data/README.md CHANGED
@@ -27,6 +27,8 @@ enum = Enumark.new('/path/to/bookmark_dump_file')
27
27
 
28
28
  enum.each do |item|
29
29
  item.name
30
+ item.dump_date
31
+ item.add_date
30
32
  item.href
31
33
  item.host
32
34
  item.categories
@@ -46,6 +48,29 @@ enum.each_dup_href do |href|
46
48
  href.name
47
49
  href.items
48
50
  end
51
+
52
+ enum.each_category do |cate|
53
+ cate.name
54
+ cate.items
55
+ end
56
+ ```
57
+
58
+ Explore trends of your dump files:
59
+
60
+ ```ruby
61
+ dir = Enumark::Dir.new('/path/to/directory_with_bookmark_dump_files_more_than_one')
62
+
63
+ dir.added # select items in last file but not in second to last
64
+ dir.deleted # reject items in last file
65
+ dir.uniq # union all items
66
+ dir.static # select items appear in all files
67
+ dir.all # enumerator of all items in all files
68
+ ```
69
+
70
+ Set config
71
+
72
+ ```ruby
73
+ Enumark::Config.set(logger: STDOUT)
49
74
  ```
50
75
 
51
76
  ## Development
@@ -56,7 +81,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
56
81
 
57
82
  ## Contributing
58
83
 
59
- Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/enumark. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/enumark/blob/master/CODE_OF_CONDUCT.md).
84
+ Bug reports and pull requests are welcome on GitHub at https://github.com/turnon/enumark. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/enumark/blob/master/CODE_OF_CONDUCT.md).
60
85
 
61
86
  ## License
62
87
 
data/enumark.gemspec CHANGED
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
11
11
  spec.summary = "Enumerate chrome bookmark dump file"
12
12
  spec.homepage = "https://github.com/turnon/enumark"
13
13
  spec.license = "MIT"
14
- spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
14
+ spec.required_ruby_version = Gem::Requirement.new(">= 3.1.0")
15
15
 
16
16
  # Specify which files should be added to the gem when it is released.
17
17
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Enumark
4
+ class Category
5
+
6
+ START = /^\s.*<DT><H3/
7
+ ENDIND = /^\s.*<\/DL><p>/
8
+ PATTERN = /ADD_DATE="(.*?)".*LAST_MODIFIED="(.*?)".*>(.*)<\/H3/
9
+
10
+ attr_reader :name
11
+ alias_method :inspect, :name
12
+ alias_method :to_s, :name
13
+
14
+ def initialize(line)
15
+ m = line.match(PATTERN)
16
+ @add_date = m[1]
17
+ @last_mod = m[2]
18
+ @name = m[3]
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Enumark
4
+ module Config
5
+ class << self
6
+ def set(**cfg)
7
+ (@cfg ||= {}).merge!(cfg)
8
+ end
9
+
10
+ def get(key)
11
+ @cfg[key]
12
+ end
13
+ end
14
+
15
+ set
16
+ end
17
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Enumark
4
+ class Dir
5
+ def initialize(dir)
6
+ @enumarks = ::Dir.glob(dir).map{ |f| ::Enumark.new(f) }
7
+ raise 'Not enough to process' if @enumarks.count <= 1
8
+ end
9
+
10
+ def added
11
+ @added ||= (@enumarks[-1] - @enumarks[-2])
12
+ end
13
+
14
+ def deleted
15
+ @deleted ||= @enumarks[0..-2].reverse_each.reduce(&:|) - @enumarks[-1]
16
+ end
17
+
18
+ def uniq
19
+ @uniq ||= @enumarks.reverse_each.reduce(&:|)
20
+ end
21
+
22
+ def static
23
+ @static ||= @enumarks.reverse_each.reduce(&:&)
24
+ end
25
+
26
+ def all
27
+ Enumerator.new do |yielder|
28
+ logger = Config.get(:logger)
29
+ file_count = @enumarks.count
30
+
31
+ @enumarks.each_with_index do |enum, idx|
32
+ enum.each do |item|
33
+ yielder << item
34
+ logger.printf("--> %6d/%-6d = %3f \r", idx + 1, file_count, ((idx + 1).to_f / file_count * 100).round(2)) if logger
35
+ end
36
+ end
37
+
38
+ logger.puts if logger
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Enumark
4
+
5
+ class Grouping
6
+ Group = Struct.new(:name, :items)
7
+
8
+ def initialize(enumark, key, &post)
9
+ @lock = Mutex.new
10
+ @collection = nil
11
+
12
+ @enumark = enumark
13
+ @key = key
14
+ @post = post
15
+ end
16
+
17
+ def each(&block)
18
+ unless @collection
19
+ @lock.synchronize do
20
+ @collection = @enumark.group_by(&@key)
21
+ @collection = @post.call(@collection) if @post
22
+ @collection = @collection.map{ |k, items| Group.new(k, items) }
23
+ end
24
+ end
25
+
26
+ @collection.each(&block)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Enumark
4
+ class Item
5
+
6
+ PREFIX = /^\s.*<DT><A/
7
+ PATTERN = /HREF="(.*?)".*ADD_DATE="(.*?)".*>(.*)<\/A>/
8
+ USELESS_SHARP = /\#.*$/
9
+
10
+ attr_reader :dump_date, :name, :href, :add_date, :categories
11
+
12
+ def initialize(dump_date, line, categories)
13
+ @dump_date = dump_date
14
+ m = line.match(PATTERN)
15
+ @href = m[1].gsub(USELESS_SHARP, '')
16
+ @add_date = Time.at(m[2].to_i)
17
+ @name = m[3]
18
+ @categories = categories
19
+ end
20
+
21
+ def inspect
22
+ @inspect ||= "#{add_date.strftime('%F %T')} #{categories_str}> #{name}"
23
+ end
24
+
25
+ def categories_str
26
+ @categories_str ||= "/#{categories.join('/')}"
27
+ end
28
+
29
+ def to_s
30
+ inspect
31
+ end
32
+
33
+ def hash
34
+ href.hash
35
+ end
36
+
37
+ def eql?(another)
38
+ href.eql?(another.href)
39
+ end
40
+
41
+ def host
42
+ @host ||= (URI.parse(href).host rescue 'unknown')
43
+ end
44
+ end
45
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Enumark
4
- VERSION = "0.1.2"
4
+ VERSION = "1.0.0"
5
5
  end
data/lib/enumark.rb CHANGED
@@ -1,115 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "enumark/version"
4
+ require 'enumark/item'
5
+ require 'enumark/category'
6
+ require 'enumark/grouping'
7
+ require 'enumark/dir'
8
+ require 'enumark/config'
4
9
 
5
10
  class Enumark
6
11
  include Enumerable
7
12
 
8
- CATEGORY_START = /^\s.*<DT><H3/
9
- CATEGORY_END = /^\s.*<\/DL><p>/
10
- CATEGORY_NAME = /ADD_DATE="(.*?)".*LAST_MODIFIED="(.*?)".*>(.*)<\/H3/
11
-
12
- ITEM_PREFIX = /^\s.*<DT><A/
13
- ITEM_NAME = /HREF="(.*?)".*ADD_DATE="(.*?)".*>(.*)<\/A>/
14
-
15
- class Category
16
- attr_reader :name
17
- alias_method :inspect, :name
18
- alias_method :to_s, :name
19
-
20
- def initialize(line)
21
- m = line.match(CATEGORY_NAME)
22
- @add_date = m[1]
23
- @last_mod = m[2]
24
- @name = m[3]
25
- end
26
- end
27
-
28
- class Item
29
- attr_reader :name, :href, :categories
30
-
31
- USELESS_SHARP = /\#.*$/
32
-
33
- def initialize(line, categories)
34
- m = line.match(ITEM_NAME)
35
- @href = m[1].gsub(USELESS_SHARP, '')
36
- @add_date = m[2]
37
- @name = m[3]
38
- @categories = categories
39
- end
40
-
41
- def inspect
42
- @inspect ||= "/#{categories.join('/')}> #{name}"
43
- end
44
-
45
- def to_s
46
- inspect
47
- end
48
-
49
- def host
50
- @host ||= (URI.parse(href).host rescue 'unknown')
51
- end
52
- end
53
-
54
- class Hostname
55
- attr_reader :name, :items
56
-
57
- def initialize(name)
58
- @name = name
59
- @items = []
60
- end
61
-
62
- def add(item)
63
- @items << item
64
- end
65
-
66
- def inspect
67
- @name
68
- end
69
-
70
- def to
71
- inspect
72
- end
73
- end
74
-
75
- class Grouping
76
- Group = Struct.new(:name, :items)
77
-
78
- def initialize(enumark, key, &post)
79
- @lock = Mutex.new
80
- @collection = nil
81
-
82
- @enumark = enumark
83
- @key = key
84
- @post = post
85
- end
86
-
87
- def each(&block)
88
- unless @collection
89
- @lock.synchronize do
90
- @collection = @enumark.group_by(&@key)
91
- @collection = @post.call(@collection) if @post
92
- @collection = @collection.map{ |k, items| Group.new(k, items) }
93
- end
94
- end
95
-
96
- @collection.each(&block)
97
- end
98
- end
99
-
100
- def initialize(file)
13
+ def initialize(file, items: nil)
101
14
  @file = file
102
15
  @lock = Mutex.new
103
- @read = false
104
- @items = []
16
+ @items = items
105
17
 
106
18
  @hosts = Grouping.new(self, :host)
107
19
  @dup_titles = Grouping.new(self, :name){ |groups| groups.select{ |_, items| items.count > 1 } }
108
20
  @dup_hrefs = Grouping.new(self, :href){ |groups| groups.select{ |_, items| items.count > 1 } }
21
+ @cates = Grouping.new(self, :categories_str)
109
22
  end
110
23
 
111
24
  def each(&block)
112
25
  read_all_lines
26
+ sort_by_add_date!
113
27
  @items.each(&block)
114
28
  end
115
29
 
@@ -125,32 +39,55 @@ class Enumark
125
39
  @dup_hrefs.each(&block)
126
40
  end
127
41
 
42
+ def each_category(&block)
43
+ @cates.each(&block)
44
+ end
45
+
46
+ [:+ ,:-, :&, :|].each do |op|
47
+ class_eval <<-EOM
48
+ def #{op}(another)
49
+ new_items = self.to_a #{op} another.to_a
50
+ Enumark.new(nil, items: new_items)
51
+ end
52
+ EOM
53
+ end
54
+
128
55
  private
129
56
 
130
57
  def read_all_lines
131
- return if @read
58
+ return if @items
132
59
 
133
60
  @lock.synchronize do
134
- next if @read
135
-
136
- _read_all_lines
137
- @read = true
61
+ _read_all_lines unless @items
138
62
  end
139
63
  end
140
64
 
141
65
  def _read_all_lines
66
+ dump_date = Pathname.new(@file).basename('.html').to_s
142
67
  categories = []
68
+ @items = []
143
69
 
144
70
  File.new(@file).each do |line|
145
71
  case line
146
- when ITEM_PREFIX
147
- item = Item.new(line, categories.dup)
72
+ when Item::PREFIX
73
+ item = Item.new(dump_date, line, categories.dup)
148
74
  @items.push(item)
149
- when CATEGORY_START
75
+ when Category::START
150
76
  categories.push(Category.new(line))
151
- when CATEGORY_END
77
+ when Category::ENDIND
152
78
  categories.pop
153
79
  end
154
80
  end
155
81
  end
82
+
83
+ def sort_by_add_date!
84
+ return if @sorted
85
+
86
+ @lock.synchronize do
87
+ next if @sorted
88
+
89
+ @items.sort!{ |i1, i2| i2.add_date <=> i1.add_date }
90
+ @sorted = true
91
+ end
92
+ end
156
93
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: enumark
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ken
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-02-13 00:00:00.000000000 Z
11
+ date: 2022-03-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pry
@@ -32,6 +32,7 @@ extensions: []
32
32
  extra_rdoc_files: []
33
33
  files:
34
34
  - ".gitignore"
35
+ - ".ruby-version"
35
36
  - CODE_OF_CONDUCT.md
36
37
  - Gemfile
37
38
  - LICENSE.txt
@@ -41,6 +42,11 @@ files:
41
42
  - bin/setup
42
43
  - enumark.gemspec
43
44
  - lib/enumark.rb
45
+ - lib/enumark/category.rb
46
+ - lib/enumark/config.rb
47
+ - lib/enumark/dir.rb
48
+ - lib/enumark/grouping.rb
49
+ - lib/enumark/item.rb
44
50
  - lib/enumark/version.rb
45
51
  homepage: https://github.com/turnon/enumark
46
52
  licenses:
@@ -54,14 +60,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
54
60
  requirements:
55
61
  - - ">="
56
62
  - !ruby/object:Gem::Version
57
- version: 2.4.0
63
+ version: 3.1.0
58
64
  required_rubygems_version: !ruby/object:Gem::Requirement
59
65
  requirements:
60
66
  - - ">="
61
67
  - !ruby/object:Gem::Version
62
68
  version: '0'
63
69
  requirements: []
64
- rubygems_version: 3.2.3
70
+ rubygems_version: 3.3.7
65
71
  signing_key:
66
72
  specification_version: 4
67
73
  summary: Enumerate chrome bookmark dump file