PyPI - linkarchivetools - Versions diffs - 0.1.10__py3-none-any.whl - Mend

linkarchivetools 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

linkarchivetools/LICENSE +674 -0
linkarchivetools/README.md +3 -0
linkarchivetools/__init__.py +8 -0
linkarchivetools/backup.py +764 -0
linkarchivetools/db2feeds.py +263 -0
linkarchivetools/db2json.py +188 -0
linkarchivetools/dbanalyzer.py +356 -0
linkarchivetools/dbfilter.py +154 -0
linkarchivetools/dbmerge.py +82 -0
linkarchivetools/json2db.py +237 -0
linkarchivetools/tableconfig.py +66 -0
linkarchivetools/utils/alchemysearch.py +177 -0
linkarchivetools/utils/omnisearch.py +335 -0
linkarchivetools/utils/reflected.py +501 -0
linkarchivetools-0.1.10.dist-info/LICENSE +674 -0
linkarchivetools-0.1.10.dist-info/METADATA +38 -0
linkarchivetools-0.1.10.dist-info/RECORD +18 -0
linkarchivetools-0.1.10.dist-info/WHEEL +4 -0

linkarchivetools/dbanalyzer.py ADDED Viewed

@@ -0,0 +1,356 @@
+"""
+Provides information about archive
+Examples:
+ - What was said about Musk
+  $ --search "title=*Musk*"
+ - What was said about Musk (title, link, description, etc)
+  $ --search "Musk"
+TODO
+ - Output formats? (md)?
+ - Maybe it could produce a chart?
+"""
+import argparse
+import time
+import os
+import json
+from sqlalchemy import create_engine
+from .utils.omnisearch import SingleSymbolEvaluator, EquationEvaluator, OmniSearch
+from .utils.alchemysearch import (
+    AlchemySymbolEvaluator,
+    AlchemyEquationEvaluator,
+    AlchemySearch,
+)
+from .utils.reflected import (
+    ReflectedTable,
+    ReflectedEntryTable,
+    ReflectedUserTags,
+    ReflectedSocialData,
+)
+def print_time_diff(start_time):
+    elapsed_time_seconds = time.time() - start_time
+    elapsed_minutes = int(elapsed_time_seconds // 60)
+    elapsed_seconds = int(elapsed_time_seconds % 60)
+    print(f"Time: {elapsed_minutes}:{elapsed_seconds}")
+class DisplayRowHandler(object):
+    def __init__(self, args=None, engine=None, connection=None):
+        self.args = args
+        self.start_time = time.time()
+        self.engine = engine
+        self.connection = connection
+        self.files = []
+        self.total_entries = 0
+        self.good_entries = 0
+        self.dead_entries = 0
+    def print_entry(self, entry):
+        level = self.args.verbosity
+        if level is None or level == 0:
+            return
+        text = ""
+        if self.args.description:
+            print("---------------------")
+        text = "[{:03d}] {}".format(entry.page_rating_votes, entry.link)
+        if self.args.title:
+            if entry.title:
+                text += " " + entry.title
+        if self.args.source:
+            source_id = entry.source
+            if source_id:
+                r = ReflectedEntryTable(self.engine, self.connection)
+                source = r.get_source(source_id)
+                text += " [{}]".format(source.title)
+        print(text)
+        if self.args.date_published:
+            date_published = entry.date_published
+            if date_published:
+                print(date_published)
+        if self.args.description:
+            description = entry.description
+            if description:
+                print(description)
+        if self.args.tags:
+            tags_table = ReflectedUserTags(self.engine, self.connection)
+            tags = tags_table.get_tags_string(entry.id)
+            if tags and tags != "":
+                self.print_tags(tags)
+        if self.args.social:
+            social_table = ReflectedSocialData(self.engine, self.connection)
+            social = social_table.get(entry.id)
+            if social is not None:
+                self.print_social(social)
+        if self.args.status:
+            print(entry.status_code)
+    def print_tags(self, tags):
+        print(tags)
+    def print_social(self, social):
+        if (
+            social.view_count is not None
+            and social.thumbs_up is not None
+            and social.thumbs_down is not None
+        ):
+            print(
+                f"V:{social.view_count} TU:{social.thumbs_up} TD:{social.thumbs_down}"
+            )
+        else:
+            if social.view_count:
+                print(f"F:{social.view_count}")
+            if social.thumbs_up:
+                print(f"F:{social.thumbs_up}")
+            if social.thumbs_down:
+                print(f"F:{social.thumbs_down}")
+            if social.upvote_diff:
+                print(f"S:{social.upvote_diff}")
+            if social.upvote_ratio:
+                print(f"S:{social.upvote_ratio}")
+            if social.followers_count:
+                print(f"F:{social.followers_count}")
+            if social.stars:
+                print(f"S:{social.stars}")
+    def get_time_diff(self):
+        return time.time() - self.start_time
+    def handle_row(self, row):
+        """
+        Row is to be expected a 'dict', eg. row["link"]
+        """
+        link = row.link
+        level = self.args.verbosity
+        self.print_entry(row)
+        self.total_entries += 1
+    def summary(self):
+        if self.args.summary:
+            if self.args.verify:
+                print(
+                    "total:{} good:{} dead:{}".format(
+                        self.total_entries, self.good_entries, self.dead_entries
+                    )
+                )
+            else:
+                print("total:{}".format(self.total_entries))
+class YieldRowHandler(object):
+    def __init__(self, args=None, engine=None, connection=None):
+        self.args = args
+        self.start_time = time.time()
+        self.engine = engine
+        self.connection = connection
+        self.files = []
+        self.total_entries = 0
+        self.good_entries = 0
+        self.dead_entries = 0
+    def handle_row(self, row):
+        """
+        Row is to be expected a 'dict', eg. row["link"]
+        """
+        yield row
+class DbAnalyzer(object):
+    def __init__(self, input_db, args=None):
+        self.args = args
+        self.result = None
+        self.engine = None
+        self.input_db = input_db
+    def print_summary(self, print_columns=False):
+        db = self.input_db
+        if not os.path.isfile(db):
+            print("File does not exist:{}".format(db))
+            return
+        self.engine = create_engine("sqlite:///" + db)
+        with self.engine.connect() as connection:
+            r = ReflectedTable(self.engine, connection)
+            r.print_summary(print_columns)
+    def search(self):
+        if self.is_db_scan():
+            file = self.input_db
+            if not os.path.isfile(file):
+                print("File does not exist:{}".format(file))
+                return
+            print("Creating engine")
+            self.engine = create_engine("sqlite:///" + self.input_db)
+            print("Creating engine DONE")
+            with self.engine.connect() as connection:
+                self.connection = connection
+                yield self.perform_search()
+    def perform_search(self):
+        row_handler = DisplayRowHandler(args=self.args, engine=self.engine, connection=self.connection)
+        search = None
+        if self.args:
+            search = self.args.search
+        print("Starting alchemy")
+        searcher = AlchemySearch(
+            self.engine,
+            search,
+            row_handler=row_handler,
+            args=self.args,
+            connection=self.connection,
+        )
+        print("Starting alchemy DONE")
+        print("Searching...")
+        yield from searcher.search()
+    def get_entries(self):
+        if self.is_db_scan():
+            file = self.input_db
+            if not os.path.isfile(file):
+                print("File does not exist:{}".format(file))
+                return
+            print("Creating engine")
+            self.engine = create_engine("sqlite:///" + self.input_db)
+            print("Creating engine DONE")
+            with self.engine.connect() as connection:
+                self.connection = connection
+                yield from self.perform_get_entries()
+    def perform_get_entries(self):
+        row_handler = YieldRowHandler(args=self.args, engine=self.engine, connection=self.connection)
+        search = None
+        if self.args:
+            search = self.args.search
+        print("Starting alchemy")
+        searcher = AlchemySearch(
+            self.engine,
+            search,
+            row_handler=row_handler,
+            args=self.args,
+            connection=self.connection,
+        )
+        print("Starting alchemy DONE")
+        print("Searching...")
+        yield from searcher.search()
+    def is_db_scan(self):
+        if self.input_db:
+            return True
+        return False
+class Parser(object):
+    def parse(self):
+        self.parser = argparse.ArgumentParser(description="Data analyzer program")
+        self.parser.add_argument("--db", help="DB to be scanned")
+        self.parser.add_argument(
+            "--search", help="Search, with syntax same as the main program / site."
+        )
+        self.parser.add_argument(
+            "--order-by", default="page_rating_votes", help="order by column."
+        )
+        self.parser.add_argument("--asc", action="store_true", help="order ascending")
+        self.parser.add_argument("--desc", action="store_true", help="order descending")
+        self.parser.add_argument("--table", default="linkdatamodel", help="Table name")
+        self.parser.add_argument("--title", action="store_true", help="displays title")
+        self.parser.add_argument(
+            "--description", action="store_true", help="displays description"
+        )
+        self.parser.add_argument(
+            "--status", action="store_true", help="displays status"
+        )
+        self.parser.add_argument("--tags", action="store_true", help="displays tags")
+        self.parser.add_argument(
+            "--social", action="store_true", help="displays social data"
+        )
+        self.parser.add_argument(
+            "--date-published", action="store_true", help="displays date-published"
+        )
+        self.parser.add_argument(
+            "--source", action="store_true", help="displays source"
+        )
+        self.parser.add_argument(
+            "--summary", action="store_true", help="displays summary of tables"
+        )
+        self.parser.add_argument(
+            "--columns",
+            action="store_true",
+            help="displays summary of tables column nmaes",
+        )
+        self.parser.add_argument(
+            "-i", "--ignore-case", action="store_true", help="Ignores case"
+        )
+        self.parser.add_argument("-v", "--verbosity",  type=int, default = 1, help="Verbosity level")
+        self.args = self.parser.parse_args()
+        return True
+def main():
+    p = Parser()
+    if not p.parse():
+        print("Could not parse options")
+        return
+    start_time = time.time()
+    m = DbAnalyzer(input_db=p.args.db, args=p.args)
+    if p.args.summary:
+        m.print_summary(p.args.columns)
+    else:
+        m.search()
+    print_time_diff(start_time)
+if __name__ == "__main__":
+    main()

linkarchivetools/dbfilter.py ADDED Viewed

@@ -0,0 +1,154 @@
+"""
+Filters out redundant things from database.
+Normally for views, analysis you do not need no temporary tables.
+"""
+import os
+import sys
+import json
+import time
+import shutil
+from pathlib import Path
+import argparse
+from sqlalchemy import create_engine
+from .utils.reflected import *
+class DbFilter(object):
+    """
+    Filter class
+    """
+    def __init__(self, input_db, output_db):
+        self.input_db = input_db
+        self.output_db = output_db
+        self.engine = None
+        self.connection = None
+        self.setup()
+    def setup(self):
+        path = Path(self.input_db)
+        if not path.exists():
+            print("File {} does not exist".format(path))
+            return
+        new_path = Path(self.output_db)
+        if new_path.exists():
+            new_path.unlink()
+        shutil.copy(self.input_db, self.output_db)
+        self.engine = create_engine(f"sqlite:///{self.output_db}")
+        self.connection = self.engine.connect()
+    def is_valid(self) -> bool:
+        if not self.engine:
+            return False
+        return True
+    def close(self):
+        if self.connection:
+            self.connection.close()
+            self.connection = None
+    def truncate(self):
+        table = ReflectedEntryTable(self.engine, self.connection)
+        table.truncate_table("userentrytransitionhistory")
+        table.truncate_table("userentryvisithistory")
+        table.truncate_table("usersearchhistory")
+        table.truncate_table("uservotes")
+        table.truncate_table("usercompactedtags")
+        table.truncate_table("usercomments")
+        table.truncate_table("userbookmarks")
+        table.truncate_table("user")
+        table.truncate_table("userconfig")
+        table.truncate_table("sourcedatamodel")
+        table.truncate_table("sourcecategories")
+        table.truncate_table("sourcesubcategories")
+        table.truncate_table("readlater")
+        table.truncate_table("modelfiles")
+        table.truncate_table("gateway")
+        table.truncate_table("entryrules")
+        table.truncate_table("domains")
+        table.truncate_table("dataexport")
+        table.truncate_table("configurationentry")
+        table.truncate_table("compactedtags")
+        table.truncate_table("blockentrylist")
+    def filter(self, conditions):
+        table = ReflectedEntryTable(self.engine, self.connection)
+        sql_text = f"DELETE FROM linkdatamodel WHERE {conditions};"
+        # TODO delete depnded things
+        table.run_sql(sql_text)
+        table.vacuum()
+        table.close()
+    def filter_bookmarks(self):
+        table = ReflectedEntryTable(self.engine, self.connection)
+        sql_text = f"DELETE FROM linkdatamodel WHERE bookmarked=False;"
+        # TODO delete depnded things
+        table.run_sql(sql_text)
+        table.vacuum()
+        table.close()
+    def filter_votes(self):
+        table = ReflectedEntryTable(self.engine, self.connection)
+        sql_text = f"DELETE FROM linkdatamodel WHERE page_rating_votes=0;"
+        table.run_sql(sql_text)
+        table.vacuum()
+        table.close()
+    def filter_redundant(self):
+        """
+        Not bookmarked AND without votes are redundant
+        """
+        table = ReflectedEntryTable(self.engine, self.connection)
+        sql_text = f"DELETE FROM linkdatamodel WHERE bookmarked=False AND page_rating_votes=0;"
+        table.run_sql(sql_text)
+        table.vacuum()
+        table.close()
+def parse():
+    parser = argparse.ArgumentParser(description="Data analyzer program")
+    parser.add_argument("--db", default="places.db", help="DB to be scanned")
+    parser.add_argument("--output-db", default="new.db", help="DB to be created")
+    parser.add_argument("--bookmarked", action="store_true", help="export bookmarks")
+    parser.add_argument("--votes", action="store_true", help="export if votes is > 0")
+    parser.add_argument("--clean", action="store_true", help="cleans db from tables")
+    parser.add_argument("-v", "--verbosity", help="Verbosity level")
+    args = parser.parse_args()
+    return parser, args
+def main():
+    start_time = time.time()
+    parser, args = parse()
+    thefilter = DbFilter(args.db, args.output_db)
+    if not thefilter.is_valid():
+        return
+    thefilter.truncate()
+    if args.bookmarked:
+        thefilter.filter_bookmarks()
+    if args.votes:
+        thefilter.filter_votes()
+    thefilter.vacuum()
+    thefilter.close()
+    end_time = time.time()
+    print(f"Done in {end_time}")
+if __name__ == "__main__":
+    main()

linkarchivetools/dbmerge.py ADDED Viewed

@@ -0,0 +1,82 @@
+import shutil
+import os
+from pathlib import Path
+from sqlalchemy import create_engine
+from .utils.reflected import *
+class DbMerge(object):
+    """
+    Converter DB -> feeds.
+    """
+    def __init__(
+        self,
+        input_dbs=None,
+        output_db=None,
+        verbose=True,
+    ):
+        """
+        Constructor
+        @param read_internet_links Read links to find RSS feeds
+        @param update_feed Many things are copied from original entry.
+                          If this setting is true, feed entry fetches title, and other properties
+        """
+        self.input_dbs = input_dbs
+        self.output_db = output_db
+        self.verbose = verbose
+    def convert(self):
+        """
+        API
+        """
+        input_dbs = self.input_dbs
+        if len(input_dbs) < 2:
+            return False
+        size_zero = os.path.getsize(input_dbs[0])
+        size_one = os.path.getsize(input_dbs[1])
+        if size_zero > size_one:
+            bigger_db = input_dbs[0]
+            smaller_db = input_dbs[1]
+        else:
+            bigger_db = input_dbs[1]
+            smaller_db = input_dbs[0]
+        dst = Path(self.output_db)
+        if dst.exists():
+            dst.unlink()
+        shutil.copy(bigger_db, self.output_db)
+        self.src_engine = create_engine(f"sqlite:///{smaller_db}")
+        self.dst_engine = create_engine(f"sqlite:///{self.output_db}")
+        with self.src_engine.connect() as connection:
+            self.src_connection = connection
+            with self.dst_engine.connect() as dst_connection:
+                self.dst_connection = dst_connection
+                self.convert_entries()
+    def convert_entries(self):
+        src_table = ReflectedEntryTable(self.src_engine, self.src_connection)
+        for entry in src_table.get_entries_good():
+            dst_table = ReflectedEntryTable(self.dst_engine, self.dst_connection)
+            if not dst_table.exists(link=entry.link):
+                self.convert_entry(entry)
+            elif self.verbose:
+                print(f"Entry {entry.link} is already present")
+    def convert_entry(self, entry):
+        if self.verbose:
+            print(f"Converting entry {entry.link}")
+        copier = EntryCopier(
+                             src_engine=self.src_engine,
+                             src_connection=self.src_connection,
+                             dst_engine = self.dst_engine,
+                             dst_connection=self.dst_connection)
+        copier.copy_entry(entry)