promnesia 1.1.20230129__py3-none-any.whl → 1.2.20240810__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promnesia/__main__.py +58 -50
- promnesia/cannon.py +4 -4
- promnesia/common.py +57 -38
- promnesia/compare.py +3 -2
- promnesia/compat.py +6 -65
- promnesia/config.py +4 -2
- promnesia/database/common.py +66 -0
- promnesia/database/dump.py +187 -0
- promnesia/{read_db.py → database/load.py} +10 -11
- promnesia/extract.py +1 -0
- promnesia/kjson.py +1 -1
- promnesia/logging.py +14 -14
- promnesia/misc/__init__.pyi +0 -0
- promnesia/misc/config_example.py +1 -2
- promnesia/misc/install_server.py +5 -4
- promnesia/server.py +24 -24
- promnesia/sources/__init__.pyi +0 -0
- promnesia/sources/auto.py +12 -7
- promnesia/sources/browser.py +80 -293
- promnesia/sources/browser_legacy.py +298 -0
- promnesia/sources/demo.py +18 -2
- promnesia/sources/filetypes.py +8 -0
- promnesia/sources/github.py +2 -2
- promnesia/sources/hackernews.py +1 -2
- promnesia/sources/hypothesis.py +1 -1
- promnesia/sources/markdown.py +15 -15
- promnesia/sources/org.py +7 -3
- promnesia/sources/plaintext.py +3 -1
- promnesia/sources/reddit.py +2 -2
- promnesia/sources/rss.py +5 -1
- promnesia/sources/shellcmd.py +6 -2
- promnesia/sources/signal.py +29 -20
- promnesia/sources/smscalls.py +8 -1
- promnesia/sources/stackexchange.py +2 -2
- promnesia/sources/takeout.py +132 -12
- promnesia/sources/takeout_legacy.py +10 -2
- promnesia/sources/telegram.py +79 -123
- promnesia/sources/telegram_legacy.py +117 -0
- promnesia/sources/vcs.py +1 -1
- promnesia/sources/viber.py +6 -15
- promnesia/sources/website.py +1 -1
- promnesia/sqlite.py +42 -0
- promnesia/tests/__init__.py +0 -0
- promnesia/tests/common.py +137 -0
- promnesia/tests/server_helper.py +64 -0
- promnesia/tests/sources/__init__.py +0 -0
- promnesia/tests/sources/test_auto.py +66 -0
- promnesia/tests/sources/test_filetypes.py +42 -0
- promnesia/tests/sources/test_hypothesis.py +39 -0
- promnesia/tests/sources/test_org.py +65 -0
- promnesia/tests/sources/test_plaintext.py +26 -0
- promnesia/tests/sources/test_shellcmd.py +22 -0
- promnesia/tests/sources/test_takeout.py +58 -0
- promnesia/tests/test_cannon.py +325 -0
- promnesia/tests/test_cli.py +42 -0
- promnesia/tests/test_compare.py +30 -0
- promnesia/tests/test_config.py +290 -0
- promnesia/tests/test_db_dump.py +223 -0
- promnesia/tests/test_extract.py +61 -0
- promnesia/tests/test_extract_urls.py +43 -0
- promnesia/tests/test_indexer.py +245 -0
- promnesia/tests/test_server.py +292 -0
- promnesia/tests/test_traverse.py +41 -0
- promnesia/tests/utils.py +35 -0
- {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/METADATA +14 -19
- promnesia-1.2.20240810.dist-info/RECORD +83 -0
- {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/WHEEL +1 -1
- {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/entry_points.txt +0 -1
- promnesia/dump.py +0 -105
- promnesia-1.1.20230129.dist-info/RECORD +0 -55
- {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/LICENSE +0 -0
- {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/top_level.txt +0 -0
promnesia/sources/auto.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
"""
|
2
2
|
- discovers files recursively
|
3
3
|
- guesses the format (orgmode/markdown/json/etc) by the extension/MIME type
|
4
|
+
- can index most of plaintext files, including source code!
|
5
|
+
- autodetects Obsidian vault and adds `obsidian://` app protocol support [[file:../src/promnesia/sources/obsidian.py][promnesia.sources.obsidian]]
|
6
|
+
- autodetects Logseq graph and adds `logseq://` app protocol support [[file:../src/promnesia/sources/logseq.py][promnesia.sources.logseq]]
|
4
7
|
"""
|
5
8
|
|
6
9
|
import csv
|
@@ -19,17 +22,18 @@ import warnings
|
|
19
22
|
import pytz
|
20
23
|
|
21
24
|
from ..common import Visit, Url, PathIsh, get_logger, Loc, get_tmpdir, extract_urls, Extraction, Result, Results, mime, traverse, file_mtime, echain, logger
|
25
|
+
from ..common import warn_once
|
22
26
|
from ..config import use_cores
|
23
27
|
|
24
28
|
|
25
|
-
from .filetypes import EUrl
|
29
|
+
from .filetypes import EUrl, Ctx
|
26
30
|
from .auto_obsidian import obsidian_replacer
|
27
31
|
from .auto_logseq import logseq_replacer
|
28
32
|
|
29
33
|
|
30
34
|
def _collect(thing, path: List[str], result: List[EUrl]) -> None:
|
31
35
|
if isinstance(thing, str):
|
32
|
-
ctx: Ctx = tuple(path)
|
36
|
+
ctx: Ctx = tuple(path)
|
33
37
|
result.extend([EUrl(url=u, ctx=ctx) for u in extract_urls(thing)])
|
34
38
|
elif isinstance(thing, list):
|
35
39
|
path.append('[]')
|
@@ -164,7 +168,7 @@ for t in CODE:
|
|
164
168
|
Replacer = Optional[Callable[[str, str], str]]
|
165
169
|
|
166
170
|
def index(
|
167
|
-
*paths:
|
171
|
+
*paths: PathIsh,
|
168
172
|
ignored: Union[Sequence[str], str]=(),
|
169
173
|
follow: bool=True,
|
170
174
|
replacer: Replacer=None,
|
@@ -279,6 +283,8 @@ def by_path(pp: Path) -> Tuple[Optional[Ex], Optional[Mime]]:
|
|
279
283
|
|
280
284
|
def _index_file(pp: Path, opts: Options) -> Results:
|
281
285
|
logger = get_logger()
|
286
|
+
# TODO need to keep debug logs here...
|
287
|
+
# logger.info(f"indexing {pp}")
|
282
288
|
# TODO use kompress?
|
283
289
|
# TODO not even sure if it's used...
|
284
290
|
suf = pp.suffix.lower()
|
@@ -304,10 +310,9 @@ def _index_file(pp: Path, opts: Options) -> Results:
|
|
304
310
|
|
305
311
|
ip, pm = by_path(pp)
|
306
312
|
if ip is None:
|
307
|
-
#
|
308
|
-
# TODO only log once? # hmm..
|
313
|
+
# todo not really sure about using warnings vs yielding error here?
|
309
314
|
msg = f'No extractor for suffix {suf}, mime {pm}'
|
310
|
-
|
315
|
+
warn_once(msg)
|
311
316
|
yield echain(ex, RuntimeError(msg))
|
312
317
|
return
|
313
318
|
|
@@ -315,7 +320,7 @@ def _index_file(pp: Path, opts: Options) -> Results:
|
|
315
320
|
|
316
321
|
def indexer() -> Union[Urls, Results]:
|
317
322
|
# eh, annoying.. need to make more generic..
|
318
|
-
idx = ip(pp)
|
323
|
+
idx = ip(pp)
|
319
324
|
try:
|
320
325
|
yield from idx
|
321
326
|
except Exception as e:
|
promnesia/sources/browser.py
CHANGED
@@ -1,302 +1,89 @@
|
|
1
|
-
|
2
|
-
from
|
3
|
-
|
4
|
-
import sqlite3
|
5
|
-
from typing import List, Set
|
1
|
+
'''
|
2
|
+
Uses [[https://github.com/karlicoss/HPI][HPI]] for visits from web browsers.
|
3
|
+
'''
|
6
4
|
|
7
|
-
import
|
5
|
+
import re
|
6
|
+
from typing import Optional, Iterator, Any, TYPE_CHECKING
|
7
|
+
import warnings
|
8
8
|
|
9
|
-
from
|
10
|
-
from .. import config
|
9
|
+
from promnesia.common import Results, Visit, Loc, Second, PathIsh, logger, is_sqlite_db
|
11
10
|
|
12
|
-
# todo mcachew?
|
13
|
-
from cachew import cachew
|
14
11
|
|
15
|
-
|
12
|
+
def index(p: Optional[PathIsh]=None) -> Results:
|
13
|
+
from . import hpi
|
16
14
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
assert pp.exists(), pp # just in case of broken symlinks
|
21
|
-
|
22
|
-
# is_file check because it also returns dirs
|
23
|
-
# TODO hmm, not sure what I meant here -- which dirs? behind symlinks?
|
24
|
-
is_db = lambda x: x.is_file() and mime(x) in {
|
25
|
-
'application/x-sqlite3',
|
26
|
-
'application/vnd.sqlite3',
|
27
|
-
# TODO this mime can also match wal files/journals, not sure
|
28
|
-
}
|
29
|
-
|
30
|
-
# todo warn if filtered out too many?
|
31
|
-
# todo wonder how quickly mimes can be computed?
|
32
|
-
# todo ugh, dunno, maybe this really belongs to hpi?? need get_files etc...
|
33
|
-
dbs = [p for p in sorted(pp.rglob('*')) if is_db(p)]
|
34
|
-
|
35
|
-
assert len(dbs) > 0, pp
|
36
|
-
logger.info('processing %d databases', len(dbs))
|
37
|
-
cname = str('_'.join(pp.parts[1:])) # meh
|
38
|
-
yield from _index_dbs(dbs, cachew_name=cname)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
def _index_dbs(dbs: List[Path], cachew_name: str):
|
43
|
-
# TODO right... not ideal, need to think how to handle it properly...
|
44
|
-
import sys
|
45
|
-
sys.setrecursionlimit(5000)
|
46
|
-
|
47
|
-
cache_dir = config.get().cache_dir
|
48
|
-
cpath = None if cache_dir is None else cache_dir / cachew_name
|
49
|
-
emitted: Set = set()
|
50
|
-
yield from _index_dbs_aux(cpath, dbs, emitted=emitted)
|
51
|
-
|
52
|
-
|
53
|
-
# todo wow, stack traces are ridiculous here...
|
54
|
-
# todo hmm, feels like it should be a class or something?
|
55
|
-
@cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs) # , logger=logger)
|
56
|
-
def _index_dbs_aux(cache_path: Path, dbs: List[Path], emitted: Set) -> Results:
|
57
|
-
if len(dbs) == 0:
|
15
|
+
if p is None:
|
16
|
+
from my.browser.all import history
|
17
|
+
yield from _index_new(history())
|
58
18
|
return
|
59
19
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
xs_was_cached = False
|
65
|
-
for r in xs_res:
|
66
|
-
# if it was cached, emitted would be empty
|
67
|
-
if len(emitted) == 0:
|
68
|
-
xs_was_cached = True
|
69
|
-
logger.debug('seems that %d first items were previously cached', len(xs))
|
70
|
-
if xs_was_cached:
|
71
|
-
key = (r.url, r.dt)
|
72
|
-
assert key not in emitted, key # todo not sure if this assert is necessary?
|
73
|
-
# hmm ok it might happen if we messed up with indexing individual db?
|
74
|
-
# alternatively, could abuse it to avoid messing with 'emitted' in _index_db?
|
75
|
-
emitted.add(key)
|
76
|
-
yield r # todo not sure about exceptions?
|
77
|
-
|
78
|
-
for db in x:
|
79
|
-
yield from _index_db(db, emitted=emitted)
|
80
|
-
|
81
|
-
|
82
|
-
def _index_db(db: Path, emitted: Set):
|
83
|
-
logger.info('processing %s', db) # debug level?
|
84
|
-
|
85
|
-
# todo schema check (not so critical for cachew though)
|
86
|
-
total = 0
|
87
|
-
new = 0
|
88
|
-
loc = Loc.file(db) # todo possibly needs to be optimized -- moving from within the loop considerably speeds everything up
|
89
|
-
with sqlite3.connect(f'file:{db}?immutable=1', uri=True) as c:
|
90
|
-
browser = None
|
91
|
-
for b in [Chrome, Firefox, FirefoxPhone, Safari]:
|
92
|
-
try:
|
93
|
-
c.execute(f'SELECT * FROM {b.detector}')
|
94
|
-
except sqlite3.OperationalError: # not sure if the right kind?
|
95
|
-
pass
|
96
|
-
else:
|
97
|
-
browser = b
|
98
|
-
break
|
99
|
-
assert browser is not None
|
100
|
-
|
101
|
-
proj = ', '.join(c for c, _ in browser.schema.cols)
|
102
|
-
query = browser.query.replace('chunk.', '')
|
103
|
-
|
104
|
-
c.row_factory = sqlite3.Row
|
105
|
-
for r in c.execute(f'select {proj} {query}'):
|
106
|
-
v = browser.row2visit(r, loc)
|
107
|
-
total += 1
|
108
|
-
|
109
|
-
key = (v.url, v.dt)
|
110
|
-
# todo how to keep keys compatible?
|
111
|
-
if key in emitted:
|
112
|
-
continue
|
113
|
-
yield v
|
114
|
-
emitted.add(key)
|
115
|
-
new += 1
|
116
|
-
|
117
|
-
# eh, ok, almost 2x faster if I don't construct Visit first
|
118
|
-
# maybe it's Loc.file that's too slow?
|
119
|
-
# yeah, seems like it, 4.1 s after computing it only once
|
120
|
-
|
121
|
-
logger.info('%s: %d/%d new visits', db, new, total)
|
122
|
-
|
123
|
-
|
124
|
-
Col = str
|
125
|
-
ColType = str
|
126
|
-
|
127
|
-
|
128
|
-
from typing import Any, NamedTuple, Tuple, Union, Sequence, Optional
|
129
|
-
|
130
|
-
class Schema(NamedTuple):
|
131
|
-
cols: Sequence[Tuple[Col, ColType]]
|
132
|
-
key: Sequence[str]
|
133
|
-
|
134
|
-
|
135
|
-
SchemaCheck = Tuple[str, Union[str, Sequence[str]]] # todo Union: meh
|
136
|
-
|
137
|
-
from dataclasses import dataclass
|
138
|
-
|
139
|
-
# todo protocol?
|
140
|
-
@dataclass
|
141
|
-
class Extr:
|
142
|
-
detector: str
|
143
|
-
schema_check: SchemaCheck
|
144
|
-
schema: Schema
|
145
|
-
query: str
|
146
|
-
|
147
|
-
# todo calllable?
|
148
|
-
@staticmethod
|
149
|
-
def row2visit(row: sqlite3.Row, loc: Loc) -> Visit:
|
150
|
-
raise NotImplementedError
|
151
|
-
|
152
|
-
|
153
|
-
class Chrome(Extr):
|
154
|
-
detector='keyword_search_terms'
|
155
|
-
schema_check=(
|
156
|
-
'visits', [
|
157
|
-
'visits', "id, url, visit_time, from_visit, transition, segment_id, visit_duration, incremented_omnibox_typed_score",
|
158
|
-
'visits', "id, url, visit_time, from_visit, transition, segment_id, visit_duration"
|
159
|
-
]
|
160
|
-
)
|
161
|
-
schema=Schema(cols=[
|
162
|
-
('U.url' , 'TEXT' ),
|
163
|
-
|
164
|
-
# while these two are not very useful, might be good to have just in case for some debugging
|
165
|
-
('U.id AS urlid' , 'INTEGER'),
|
166
|
-
('V.id AS vid' , 'INTEGER'),
|
167
|
-
|
168
|
-
('V.visit_time' , 'INTEGER NOT NULL'),
|
169
|
-
('V.from_visit' , 'INTEGER' ),
|
170
|
-
('V.transition' , 'INTEGER NOT NULL'),
|
171
|
-
# V.segment_id looks useless
|
172
|
-
('V.visit_duration' , 'INTEGER NOT NULL'),
|
173
|
-
# V.omnibox thing looks useless
|
174
|
-
], key=('url', 'visit_time', 'vid', 'urlid'))
|
175
|
-
query='FROM chunk.visits as V, chunk.urls as U WHERE V.url = U.id'
|
176
|
-
|
177
|
-
@staticmethod
|
178
|
-
def row2visit(row: sqlite3.Row, loc: Loc) -> Visit:
|
179
|
-
url = row['url']
|
180
|
-
ts = row['visit_time']
|
181
|
-
durs = row['visit_duration']
|
182
|
-
|
183
|
-
dt = chrome_time_to_utc(int(ts))
|
184
|
-
url = unquote(url) # chrome urls are all quoted
|
185
|
-
dd = int(durs)
|
186
|
-
dur: Optional[Second] = None if dd == 0 else dd // 1_000_000
|
187
|
-
return Visit(
|
188
|
-
url=url,
|
189
|
-
dt=dt,
|
190
|
-
locator=loc,
|
191
|
-
duration=dur,
|
192
|
-
)
|
193
|
-
|
194
|
-
|
195
|
-
# should be utc? https://stackoverflow.com/a/26226771/706389
|
196
|
-
# yep, tested it and looks like utc
|
197
|
-
def chrome_time_to_utc(chrome_time: int) -> datetime:
|
198
|
-
epoch = (chrome_time / 1_000_000) - 11644473600
|
199
|
-
return datetime.fromtimestamp(epoch, pytz.utc)
|
200
|
-
|
201
|
-
|
202
|
-
def _row2visit_firefox(row: sqlite3.Row, loc: Loc) -> Visit:
|
203
|
-
url = row['url']
|
204
|
-
ts = float(row['visit_date'])
|
205
|
-
# ok, looks like it's unix epoch
|
206
|
-
# https://stackoverflow.com/a/19430099/706389
|
207
|
-
|
208
|
-
# NOTE: ugh. on Fenix (experimental Android version) it uses milliseconds, not nanos...
|
209
|
-
# about year 2001... if someone has browser history exports before that -- please let me know, I'm impressed
|
210
|
-
threshold = 1000000000
|
211
|
-
if ts > threshold * 1_000_000:
|
212
|
-
# presumably it's in microseconds
|
213
|
-
ts /= 1_000_000
|
214
|
-
else:
|
215
|
-
# milliseconds
|
216
|
-
ts /= 1_000
|
217
|
-
dt = datetime.fromtimestamp(ts, pytz.utc)
|
218
|
-
url = unquote(url) # firefox urls are all quoted
|
219
|
-
return Visit(
|
220
|
-
url=url,
|
221
|
-
dt=dt,
|
222
|
-
locator=loc,
|
223
|
-
)
|
224
|
-
|
225
|
-
# https://web.archive.org/web/20201026130310/http://fileformats.archiveteam.org/wiki/History.db
|
226
|
-
class Safari(Extr):
|
227
|
-
detector='history_tombstones'
|
228
|
-
schema_check=(
|
229
|
-
'history_visits', [
|
230
|
-
'history_visits', "id, history_item, visit_time",
|
231
|
-
'history_items', "id, url"
|
232
|
-
]
|
20
|
+
warnings.warn(
|
21
|
+
f'Passing paths to promnesia.sources.browser is deprecated, you should setup my.browser.export instead. '
|
22
|
+
f'See https://github.com/seanbreckenridge/browserexport#hpi .'
|
23
|
+
f'Will try to hack path to browser databases {p} into HPI config.'
|
233
24
|
)
|
234
|
-
|
235
|
-
(
|
236
|
-
|
237
|
-
|
238
|
-
(
|
239
|
-
('
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
25
|
+
try:
|
26
|
+
yield from _index_new_with_adhoc_config(path=p)
|
27
|
+
return
|
28
|
+
except Exception as e:
|
29
|
+
logger.exception(e)
|
30
|
+
warnings.warn("Hacking my.config.browser.export didn't work. You probably need to update HPI.")
|
31
|
+
|
32
|
+
logger.warning("Falling back onto legacy promnesia.sources.browser_legacy module")
|
33
|
+
yield from _index_old(path=p)
|
34
|
+
|
35
|
+
|
36
|
+
def _index_old(*, path: PathIsh) -> Results:
|
37
|
+
from . import browser_legacy
|
38
|
+
yield from browser_legacy.index(path)
|
39
|
+
|
40
|
+
|
41
|
+
def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
|
42
|
+
from . import hpi
|
43
|
+
|
44
|
+
## previously, it was possible to index be called with multiple different db search paths
|
45
|
+
## this would result in each subsequent call to my.browser.export.history to invalidate cache every time
|
46
|
+
## so we hack cachew path so it's different for each call
|
47
|
+
from my.core.core_config import config as hpi_core_config
|
48
|
+
hpi_cache_dir = hpi_core_config.get_cache_dir()
|
49
|
+
sanitized_path = re.sub(r'\W', '_', str(path))
|
50
|
+
cache_override = None if hpi_cache_dir is None else hpi_cache_dir / sanitized_path
|
51
|
+
##
|
52
|
+
|
53
|
+
from my.core.common import classproperty, Paths, get_files
|
54
|
+
class config:
|
55
|
+
class core:
|
56
|
+
cache_dir = cache_override
|
57
|
+
|
58
|
+
class browser:
|
59
|
+
class export:
|
60
|
+
@classproperty
|
61
|
+
def export_path(cls) -> Paths:
|
62
|
+
return tuple([f for f in get_files(path, glob='**/*') if is_sqlite_db(f)])
|
63
|
+
|
64
|
+
from my.core.cfg import tmp_config
|
65
|
+
with tmp_config(modules='my.browser.export|my.core.core_config', config=config):
|
66
|
+
from my.browser.export import history
|
67
|
+
yield from _index_new(history())
|
68
|
+
|
69
|
+
|
70
|
+
if TYPE_CHECKING:
|
71
|
+
from browserexport.merge import Visit as BrowserMergeVisit
|
72
|
+
else:
|
73
|
+
BrowserMergeVisit = Any
|
74
|
+
|
75
|
+
|
76
|
+
def _index_new(history: Iterator[BrowserMergeVisit]) -> Results:
|
77
|
+
for v in history:
|
78
|
+
desc: Optional[str] = None
|
79
|
+
duration: Optional[Second] = None
|
80
|
+
metadata = v.metadata
|
81
|
+
if metadata is not None:
|
82
|
+
desc = metadata.title
|
83
|
+
duration = metadata.duration
|
84
|
+
yield Visit(
|
85
|
+
url=v.url,
|
86
|
+
dt=v.dt,
|
87
|
+
locator=Loc(title=desc or v.url, href=v.url),
|
88
|
+
duration=duration,
|
260
89
|
)
|
261
|
-
|
262
|
-
# https://web.archive.org/web/20190730231715/https://www.forensicswiki.org/wiki/Mozilla_Firefox_3_History_File_Format#moz_historyvisits
|
263
|
-
class Firefox(Extr):
|
264
|
-
detector='moz_meta'
|
265
|
-
schema_check=('moz_historyvisits', "id, from_visit, place_id, visit_date, visit_type")
|
266
|
-
schema=Schema(cols=[
|
267
|
-
('P.url' , 'TEXT'),
|
268
|
-
|
269
|
-
('P.id AS pid' , 'INTEGER'),
|
270
|
-
('V.id AS vid' , 'INTEGER'),
|
271
|
-
|
272
|
-
('V.from_visit', 'INTEGER'),
|
273
|
-
('V.visit_date', 'INTEGER'),
|
274
|
-
('V.visit_type', 'INTEGER'),
|
275
|
-
|
276
|
-
# not sure what session is form but could be useful?..
|
277
|
-
# NOTE(20210410): for now, commented it out since some older databases from phone have this column commented?
|
278
|
-
# needs to be defensive
|
279
|
-
# ('V.session' , 'INTEGER'),
|
280
|
-
], key=('url', 'visit_date', 'vid', 'pid'))
|
281
|
-
query='FROM chunk.moz_historyvisits as V, chunk.moz_places as P WHERE V.place_id = P.id'
|
282
|
-
|
283
|
-
row2visit = _row2visit_firefox
|
284
|
-
|
285
|
-
|
286
|
-
class FirefoxPhone(Extr):
|
287
|
-
detector='remote_devices'
|
288
|
-
schema_check=('visits', "_id, history_guid, visit_type, date, is_local")
|
289
|
-
schema=Schema(cols=[
|
290
|
-
('H.url' , 'TEXT NOT NULL' ),
|
291
|
-
|
292
|
-
('H.guid AS guid' , 'TEXT' ),
|
293
|
-
('H._id AS hid' , 'INTEGER' ),
|
294
|
-
('V._id AS vid' , 'INTEGER' ),
|
295
|
-
|
296
|
-
('V.visit_type' , 'INTEGER NOT NULL'),
|
297
|
-
('V.date as visit_date', 'INTEGER NOT NULL'),
|
298
|
-
# ('is_local' , 'INTEGER NOT NULL'),
|
299
|
-
], key=('url', 'date', 'vid', 'hid'))
|
300
|
-
query='FROM chunk.visits as V, chunk.history as H WHERE V.history_guid = H.guid'
|
301
|
-
|
302
|
-
row2visit = _row2visit_firefox
|