linkarchivetools 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,356 @@
1
+ """
2
+ Provides information about archive
3
+
4
+ Examples:
5
+ - What was said about Musk
6
+ $ --search "title=*Musk*"
7
+ - What was said about Musk (title, link, description, etc)
8
+ $ --search "Musk"
9
+
10
+ TODO
11
+ - Output formats? (md)?
12
+ - Maybe it could produce a chart?
13
+
14
+ """
15
+
16
+ import argparse
17
+ import time
18
+ import os
19
+ import json
20
+ from sqlalchemy import create_engine
21
+
22
+ from .utils.omnisearch import SingleSymbolEvaluator, EquationEvaluator, OmniSearch
23
+ from .utils.alchemysearch import (
24
+ AlchemySymbolEvaluator,
25
+ AlchemyEquationEvaluator,
26
+ AlchemySearch,
27
+ )
28
+ from .utils.reflected import (
29
+ ReflectedTable,
30
+ ReflectedEntryTable,
31
+ ReflectedUserTags,
32
+ ReflectedSocialData,
33
+ )
34
+
35
+
36
+ def print_time_diff(start_time):
37
+ elapsed_time_seconds = time.time() - start_time
38
+ elapsed_minutes = int(elapsed_time_seconds // 60)
39
+ elapsed_seconds = int(elapsed_time_seconds % 60)
40
+ print(f"Time: {elapsed_minutes}:{elapsed_seconds}")
41
+
42
+
43
+ class DisplayRowHandler(object):
44
+
45
+ def __init__(self, args=None, engine=None, connection=None):
46
+ self.args = args
47
+ self.start_time = time.time()
48
+ self.engine = engine
49
+ self.connection = connection
50
+
51
+ self.files = []
52
+
53
+ self.total_entries = 0
54
+ self.good_entries = 0
55
+ self.dead_entries = 0
56
+
57
+ def print_entry(self, entry):
58
+ level = self.args.verbosity
59
+ if level is None or level == 0:
60
+ return
61
+
62
+ text = ""
63
+
64
+ if self.args.description:
65
+ print("---------------------")
66
+
67
+ text = "[{:03d}] {}".format(entry.page_rating_votes, entry.link)
68
+
69
+ if self.args.title:
70
+ if entry.title:
71
+ text += " " + entry.title
72
+
73
+ if self.args.source:
74
+ source_id = entry.source
75
+ if source_id:
76
+ r = ReflectedEntryTable(self.engine, self.connection)
77
+ source = r.get_source(source_id)
78
+ text += " [{}]".format(source.title)
79
+
80
+ print(text)
81
+
82
+ if self.args.date_published:
83
+ date_published = entry.date_published
84
+ if date_published:
85
+ print(date_published)
86
+
87
+ if self.args.description:
88
+ description = entry.description
89
+ if description:
90
+ print(description)
91
+
92
+ if self.args.tags:
93
+ tags_table = ReflectedUserTags(self.engine, self.connection)
94
+ tags = tags_table.get_tags_string(entry.id)
95
+ if tags and tags != "":
96
+ self.print_tags(tags)
97
+
98
+ if self.args.social:
99
+ social_table = ReflectedSocialData(self.engine, self.connection)
100
+ social = social_table.get(entry.id)
101
+ if social is not None:
102
+ self.print_social(social)
103
+
104
+ if self.args.status:
105
+ print(entry.status_code)
106
+
107
+ def print_tags(self, tags):
108
+ print(tags)
109
+
110
+ def print_social(self, social):
111
+ if (
112
+ social.view_count is not None
113
+ and social.thumbs_up is not None
114
+ and social.thumbs_down is not None
115
+ ):
116
+ print(
117
+ f"V:{social.view_count} TU:{social.thumbs_up} TD:{social.thumbs_down}"
118
+ )
119
+ else:
120
+ if social.view_count:
121
+ print(f"F:{social.view_count}")
122
+
123
+ if social.thumbs_up:
124
+ print(f"F:{social.thumbs_up}")
125
+
126
+ if social.thumbs_down:
127
+ print(f"F:{social.thumbs_down}")
128
+
129
+ if social.upvote_diff:
130
+ print(f"S:{social.upvote_diff}")
131
+
132
+ if social.upvote_ratio:
133
+ print(f"S:{social.upvote_ratio}")
134
+
135
+ if social.followers_count:
136
+ print(f"F:{social.followers_count}")
137
+
138
+ if social.stars:
139
+ print(f"S:{social.stars}")
140
+
141
+ def get_time_diff(self):
142
+ return time.time() - self.start_time
143
+
144
+ def handle_row(self, row):
145
+ """
146
+ Row is to be expected a 'dict', eg. row["link"]
147
+ """
148
+ link = row.link
149
+
150
+ level = self.args.verbosity
151
+
152
+ self.print_entry(row)
153
+
154
+ self.total_entries += 1
155
+
156
+ def summary(self):
157
+ if self.args.summary:
158
+ if self.args.verify:
159
+ print(
160
+ "total:{} good:{} dead:{}".format(
161
+ self.total_entries, self.good_entries, self.dead_entries
162
+ )
163
+ )
164
+ else:
165
+ print("total:{}".format(self.total_entries))
166
+
167
+
168
+ class YieldRowHandler(object):
169
+
170
+ def __init__(self, args=None, engine=None, connection=None):
171
+ self.args = args
172
+ self.start_time = time.time()
173
+ self.engine = engine
174
+ self.connection = connection
175
+
176
+ self.files = []
177
+
178
+ self.total_entries = 0
179
+ self.good_entries = 0
180
+ self.dead_entries = 0
181
+
182
+ def handle_row(self, row):
183
+ """
184
+ Row is to be expected a 'dict', eg. row["link"]
185
+ """
186
+ yield row
187
+
188
+
189
+ class DbAnalyzer(object):
190
+ def __init__(self, input_db, args=None):
191
+ self.args = args
192
+ self.result = None
193
+ self.engine = None
194
+ self.input_db = input_db
195
+
196
+ def print_summary(self, print_columns=False):
197
+ db = self.input_db
198
+
199
+ if not os.path.isfile(db):
200
+ print("File does not exist:{}".format(db))
201
+ return
202
+
203
+ self.engine = create_engine("sqlite:///" + db)
204
+ with self.engine.connect() as connection:
205
+ r = ReflectedTable(self.engine, connection)
206
+ r.print_summary(print_columns)
207
+
208
+ def search(self):
209
+ if self.is_db_scan():
210
+ file = self.input_db
211
+ if not os.path.isfile(file):
212
+ print("File does not exist:{}".format(file))
213
+ return
214
+
215
+ print("Creating engine")
216
+ self.engine = create_engine("sqlite:///" + self.input_db)
217
+ print("Creating engine DONE")
218
+
219
+ with self.engine.connect() as connection:
220
+ self.connection = connection
221
+ yield self.perform_search()
222
+
223
+ def perform_search(self):
224
+ row_handler = DisplayRowHandler(args=self.args, engine=self.engine, connection=self.connection)
225
+
226
+ search = None
227
+ if self.args:
228
+ search = self.args.search
229
+
230
+ print("Starting alchemy")
231
+ searcher = AlchemySearch(
232
+ self.engine,
233
+ search,
234
+ row_handler=row_handler,
235
+ args=self.args,
236
+ connection=self.connection,
237
+ )
238
+ print("Starting alchemy DONE")
239
+
240
+ print("Searching...")
241
+ yield from searcher.search()
242
+
243
+ def get_entries(self):
244
+ if self.is_db_scan():
245
+ file = self.input_db
246
+ if not os.path.isfile(file):
247
+ print("File does not exist:{}".format(file))
248
+ return
249
+
250
+ print("Creating engine")
251
+ self.engine = create_engine("sqlite:///" + self.input_db)
252
+ print("Creating engine DONE")
253
+
254
+ with self.engine.connect() as connection:
255
+ self.connection = connection
256
+ yield from self.perform_get_entries()
257
+
258
+ def perform_get_entries(self):
259
+ row_handler = YieldRowHandler(args=self.args, engine=self.engine, connection=self.connection)
260
+
261
+ search = None
262
+ if self.args:
263
+ search = self.args.search
264
+
265
+ print("Starting alchemy")
266
+ searcher = AlchemySearch(
267
+ self.engine,
268
+ search,
269
+ row_handler=row_handler,
270
+ args=self.args,
271
+ connection=self.connection,
272
+ )
273
+ print("Starting alchemy DONE")
274
+
275
+ print("Searching...")
276
+ yield from searcher.search()
277
+
278
+ def is_db_scan(self):
279
+ if self.input_db:
280
+ return True
281
+
282
+ return False
283
+
284
+
285
+ class Parser(object):
286
+
287
+ def parse(self):
288
+ self.parser = argparse.ArgumentParser(description="Data analyzer program")
289
+ self.parser.add_argument("--db", help="DB to be scanned")
290
+
291
+ self.parser.add_argument(
292
+ "--search", help="Search, with syntax same as the main program / site."
293
+ )
294
+ self.parser.add_argument(
295
+ "--order-by", default="page_rating_votes", help="order by column."
296
+ )
297
+ self.parser.add_argument("--asc", action="store_true", help="order ascending")
298
+ self.parser.add_argument("--desc", action="store_true", help="order descending")
299
+ self.parser.add_argument("--table", default="linkdatamodel", help="Table name")
300
+
301
+ self.parser.add_argument("--title", action="store_true", help="displays title")
302
+ self.parser.add_argument(
303
+ "--description", action="store_true", help="displays description"
304
+ )
305
+ self.parser.add_argument(
306
+ "--status", action="store_true", help="displays status"
307
+ )
308
+ self.parser.add_argument("--tags", action="store_true", help="displays tags")
309
+ self.parser.add_argument(
310
+ "--social", action="store_true", help="displays social data"
311
+ )
312
+ self.parser.add_argument(
313
+ "--date-published", action="store_true", help="displays date-published"
314
+ )
315
+ self.parser.add_argument(
316
+ "--source", action="store_true", help="displays source"
317
+ )
318
+
319
+ self.parser.add_argument(
320
+ "--summary", action="store_true", help="displays summary of tables"
321
+ )
322
+ self.parser.add_argument(
323
+ "--columns",
324
+ action="store_true",
325
+ help="displays summary of tables column nmaes",
326
+ )
327
+
328
+ self.parser.add_argument(
329
+ "-i", "--ignore-case", action="store_true", help="Ignores case"
330
+ )
331
+ self.parser.add_argument("-v", "--verbosity", type=int, default = 1, help="Verbosity level")
332
+
333
+ self.args = self.parser.parse_args()
334
+
335
+ return True
336
+
337
+
338
+ def main():
339
+ p = Parser()
340
+ if not p.parse():
341
+ print("Could not parse options")
342
+ return
343
+
344
+ start_time = time.time()
345
+
346
+ m = DbAnalyzer(input_db=p.args.db, args=p.args)
347
+ if p.args.summary:
348
+ m.print_summary(p.args.columns)
349
+ else:
350
+ m.search()
351
+
352
+ print_time_diff(start_time)
353
+
354
+
355
+ if __name__ == "__main__":
356
+ main()
@@ -0,0 +1,154 @@
1
+ """
2
+ Filters out redundant things from database.
3
+ Normally for views, analysis you do not need no temporary tables.
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import json
9
+ import time
10
+ import shutil
11
+ from pathlib import Path
12
+ import argparse
13
+
14
+ from sqlalchemy import create_engine
15
+ from .utils.reflected import *
16
+
17
+
18
+ class DbFilter(object):
19
+ """
20
+ Filter class
21
+ """
22
+
23
+ def __init__(self, input_db, output_db):
24
+ self.input_db = input_db
25
+ self.output_db = output_db
26
+ self.engine = None
27
+ self.connection = None
28
+ self.setup()
29
+
30
+ def setup(self):
31
+ path = Path(self.input_db)
32
+ if not path.exists():
33
+ print("File {} does not exist".format(path))
34
+ return
35
+
36
+ new_path = Path(self.output_db)
37
+ if new_path.exists():
38
+ new_path.unlink()
39
+
40
+ shutil.copy(self.input_db, self.output_db)
41
+
42
+ self.engine = create_engine(f"sqlite:///{self.output_db}")
43
+ self.connection = self.engine.connect()
44
+
45
+ def is_valid(self) -> bool:
46
+ if not self.engine:
47
+ return False
48
+ return True
49
+
50
+ def close(self):
51
+ if self.connection:
52
+ self.connection.close()
53
+ self.connection = None
54
+
55
+ def truncate(self):
56
+ table = ReflectedEntryTable(self.engine, self.connection)
57
+
58
+ table.truncate_table("userentrytransitionhistory")
59
+ table.truncate_table("userentryvisithistory")
60
+ table.truncate_table("usersearchhistory")
61
+ table.truncate_table("uservotes")
62
+ table.truncate_table("usercompactedtags")
63
+ table.truncate_table("usercomments")
64
+ table.truncate_table("userbookmarks")
65
+ table.truncate_table("user")
66
+ table.truncate_table("userconfig")
67
+ table.truncate_table("sourcedatamodel")
68
+ table.truncate_table("sourcecategories")
69
+ table.truncate_table("sourcesubcategories")
70
+ table.truncate_table("readlater")
71
+ table.truncate_table("modelfiles")
72
+ table.truncate_table("gateway")
73
+ table.truncate_table("entryrules")
74
+ table.truncate_table("domains")
75
+ table.truncate_table("dataexport")
76
+ table.truncate_table("configurationentry")
77
+ table.truncate_table("compactedtags")
78
+ table.truncate_table("blockentrylist")
79
+
80
+ def filter(self, conditions):
81
+ table = ReflectedEntryTable(self.engine, self.connection)
82
+
83
+ sql_text = f"DELETE FROM linkdatamodel WHERE {conditions};"
84
+ # TODO delete depnded things
85
+ table.run_sql(sql_text)
86
+ table.vacuum()
87
+ table.close()
88
+
89
+ def filter_bookmarks(self):
90
+ table = ReflectedEntryTable(self.engine, self.connection)
91
+
92
+ sql_text = f"DELETE FROM linkdatamodel WHERE bookmarked=False;"
93
+ # TODO delete depnded things
94
+ table.run_sql(sql_text)
95
+ table.vacuum()
96
+ table.close()
97
+
98
+ def filter_votes(self):
99
+ table = ReflectedEntryTable(self.engine, self.connection)
100
+
101
+ sql_text = f"DELETE FROM linkdatamodel WHERE page_rating_votes=0;"
102
+ table.run_sql(sql_text)
103
+ table.vacuum()
104
+ table.close()
105
+
106
+ def filter_redundant(self):
107
+ """
108
+ Not bookmarked AND without votes are redundant
109
+ """
110
+ table = ReflectedEntryTable(self.engine, self.connection)
111
+
112
+ sql_text = f"DELETE FROM linkdatamodel WHERE bookmarked=False AND page_rating_votes=0;"
113
+ table.run_sql(sql_text)
114
+ table.vacuum()
115
+ table.close()
116
+
117
+
118
+ def parse():
119
+ parser = argparse.ArgumentParser(description="Data analyzer program")
120
+ parser.add_argument("--db", default="places.db", help="DB to be scanned")
121
+ parser.add_argument("--output-db", default="new.db", help="DB to be created")
122
+ parser.add_argument("--bookmarked", action="store_true", help="export bookmarks")
123
+ parser.add_argument("--votes", action="store_true", help="export if votes is > 0")
124
+ parser.add_argument("--clean", action="store_true", help="cleans db from tables")
125
+ parser.add_argument("-v", "--verbosity", help="Verbosity level")
126
+
127
+ args = parser.parse_args()
128
+
129
+ return parser, args
130
+
131
+
132
+ def main():
133
+ start_time = time.time()
134
+ parser, args = parse()
135
+
136
+ thefilter = DbFilter(args.db, args.output_db)
137
+ if not thefilter.is_valid():
138
+ return
139
+
140
+ thefilter.truncate()
141
+ if args.bookmarked:
142
+ thefilter.filter_bookmarks()
143
+ if args.votes:
144
+ thefilter.filter_votes()
145
+
146
+ thefilter.vacuum()
147
+ thefilter.close()
148
+
149
+ end_time = time.time()
150
+ print(f"Done in {end_time}")
151
+
152
+
153
+ if __name__ == "__main__":
154
+ main()
@@ -0,0 +1,82 @@
1
+ import shutil
2
+ import os
3
+ from pathlib import Path
4
+ from sqlalchemy import create_engine
5
+
6
+ from .utils.reflected import *
7
+
8
+
9
+ class DbMerge(object):
10
+ """
11
+ Converter DB -> feeds.
12
+ """
13
+
14
+ def __init__(
15
+ self,
16
+ input_dbs=None,
17
+ output_db=None,
18
+ verbose=True,
19
+ ):
20
+ """
21
+ Constructor
22
+ @param read_internet_links Read links to find RSS feeds
23
+ @param update_feed Many things are copied from original entry.
24
+ If this setting is true, feed entry fetches title, and other properties
25
+ """
26
+ self.input_dbs = input_dbs
27
+ self.output_db = output_db
28
+ self.verbose = verbose
29
+
30
+ def convert(self):
31
+ """
32
+ API
33
+ """
34
+ input_dbs = self.input_dbs
35
+ if len(input_dbs) < 2:
36
+ return False
37
+
38
+ size_zero = os.path.getsize(input_dbs[0])
39
+ size_one = os.path.getsize(input_dbs[1])
40
+
41
+ if size_zero > size_one:
42
+ bigger_db = input_dbs[0]
43
+ smaller_db = input_dbs[1]
44
+ else:
45
+ bigger_db = input_dbs[1]
46
+ smaller_db = input_dbs[0]
47
+
48
+ dst = Path(self.output_db)
49
+
50
+ if dst.exists():
51
+ dst.unlink()
52
+
53
+ shutil.copy(bigger_db, self.output_db)
54
+
55
+ self.src_engine = create_engine(f"sqlite:///{smaller_db}")
56
+ self.dst_engine = create_engine(f"sqlite:///{self.output_db}")
57
+
58
+ with self.src_engine.connect() as connection:
59
+ self.src_connection = connection
60
+ with self.dst_engine.connect() as dst_connection:
61
+ self.dst_connection = dst_connection
62
+ self.convert_entries()
63
+
64
+ def convert_entries(self):
65
+ src_table = ReflectedEntryTable(self.src_engine, self.src_connection)
66
+ for entry in src_table.get_entries_good():
67
+ dst_table = ReflectedEntryTable(self.dst_engine, self.dst_connection)
68
+ if not dst_table.exists(link=entry.link):
69
+ self.convert_entry(entry)
70
+ elif self.verbose:
71
+ print(f"Entry {entry.link} is already present")
72
+
73
+ def convert_entry(self, entry):
74
+ if self.verbose:
75
+ print(f"Converting entry {entry.link}")
76
+
77
+ copier = EntryCopier(
78
+ src_engine=self.src_engine,
79
+ src_connection=self.src_connection,
80
+ dst_engine = self.dst_engine,
81
+ dst_connection=self.dst_connection)
82
+ copier.copy_entry(entry)