bajo-extra 0.2.11 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bajo/config.json CHANGED
@@ -10,7 +10,7 @@
10
10
  }
11
11
  },
12
12
  "import": {
13
- "maxBatch": 100
13
+ "maxBatch": 1000
14
14
  }
15
15
  }
16
16
  }
@@ -1,6 +1,6 @@
1
1
  import path from 'path'
2
2
  import scramjet from 'scramjet'
3
- import format from 'ndjson-csv-xlsx'
3
+ import format from '../../lib/ndjson-csv-xlsx.js'
4
4
  import { createGzip } from 'zlib'
5
5
 
6
6
  const { json, ndjson, csv, xlsx } = format
@@ -14,7 +14,7 @@ async function getFile (dest, ensureDir) {
14
14
  let file
15
15
  if (path.isAbsolute(dest)) file = dest
16
16
  else {
17
- file = `${getPluginDataDir('bajoDb')}/export/${dest}`
17
+ file = `${getPluginDataDir('bajoExtra')}/export/${dest}`
18
18
  fs.ensureDirSync(path.dirname(file))
19
19
  }
20
20
  file = increment(file, { fs: true })
@@ -10,24 +10,34 @@ async function handler (rec, bulk) {
10
10
  save.checkUnique = save.checkUnique ?? 'id'
11
11
  if (['unique', 'upsert'].includes(save.mode)) {
12
12
  const query = isFunction(save.checkUnique) ? await save.checkUnique.call(this, rec, save) : set({}, save.checkUnique, rec[save.checkUnique])
13
- const resp = await recordFind(save.coll, { query, limit: 1 }, { skipCache: true })
13
+ const resp = await recordFind(save.coll, { query, limit: 1 }, { noCache: true })
14
14
  if (resp.length > 0) existing = resp[0]
15
15
  }
16
16
  if (existing) {
17
17
  if (save.mode === 'upsert') {
18
18
  const body = save.updateConverter ? await save.updateConverter.call(this, rec, save) : rec
19
- record = await recordUpdate(save.coll, existing.id, body)
20
- method = 'updated'
19
+ try {
20
+ record = await recordUpdate(save.coll, existing.id, body)
21
+ method = 'updated'
22
+ } catch (err) {
23
+ console.error(err)
24
+ method = 'error'
25
+ }
21
26
  } else {
22
27
  method = 'skipped'
23
28
  }
24
29
  } else {
25
- record = await recordCreate(save.coll, rec)
26
- method = 'created'
30
+ try {
31
+ record = await recordCreate(save.coll, rec)
32
+ method = 'created'
33
+ } catch (err) {
34
+ console.error(err)
35
+ method = 'error'
36
+ }
27
37
  }
28
38
  if (record && current.coll && current.query) {
29
39
  const query = await current.query.call(this, { body: rec, record, opts: save })
30
- const recs = await recordFind(current.coll, { query }, { skipCache: true })
40
+ const recs = await recordFind(current.coll, { query }, { noCache: true })
31
41
  const rc = current.converter ? await current.converter.call(this, { body: rec, record, opts: save }) : rec
32
42
  if (rc) {
33
43
  if (recs.length > 0) {
@@ -42,10 +52,14 @@ async function handler (rec, bulk) {
42
52
  }
43
53
 
44
54
  async function fetchAndSave ({ url, bulk, save = {}, opts = {} } = {}) {
45
- const { importPkg } = this.bajo.helper
55
+ const { importPkg, getConfig, importModule } = this.bajo.helper
46
56
  const { fetchBulk } = this.bajoExtra.helper
47
57
  const { merge } = await importPkg('lodash-es')
48
58
  merge(bulk, { handler, save })
59
+ const cfgDb = getConfig('bajoDb', { full: true })
60
+ const start = await importModule(`${cfgDb.dir.pkg}/bajo/start.js`)
61
+ await start.call(this, 'all')
62
+
49
63
  await fetchBulk(url, bulk, opts)
50
64
  }
51
65
 
@@ -18,6 +18,7 @@ async function fetching ({ url, opts, bulk, spin }) {
18
18
  let count = 0
19
19
  const stat = { created: 0, updated: 0, skipped: 0, error: 0 }
20
20
  bulk.dataKey = bulk.dataKey ?? 'data'
21
+ if (bulk.printCount === true) bulk.printCount = 100
21
22
  const data = isFunction(bulk.dataKey) ? await bulk.dataKey.call(this, resp) : resp[bulk.dataKey]
22
23
  if (data.length === 0) {
23
24
  print.warn('No records to process, abort')
@@ -43,7 +44,7 @@ async function fetching ({ url, opts, bulk, spin }) {
43
44
  }
44
45
  }
45
46
  print.succeed('[%s] %d/%d records processed', spin.getElapsed(), count, data.length)
46
- print.succeed('[%s] Created: %d, Updated: %d, Skipped: %d', spin.getElapsed(), stat.created, stat.updated, stat.skipped)
47
+ if (!bulk.noStat) print.succeed('[%s] Created: %d, Updated: %d, Skipped: %d', spin.getElapsed(), stat.created, stat.updated, stat.skipped)
47
48
  return data.length
48
49
  }
49
50
 
@@ -53,7 +54,7 @@ async function fetchBulk (url, bulk = {}, opts = {}) {
53
54
  opts.params = opts.params ?? {}
54
55
  bulk.maxStep = bulk.maxStep ?? 0
55
56
  if (!isFunction(bulk.handler)) throw error('A function handler must be provided')
56
- if (bulk.paramsIncFn && isFunction(bulk.ParamsFn)) {
57
+ if (isFunction(bulk.paramsIncFn)) {
57
58
  print.info('Bulk fetch starting')
58
59
  const spin = spinner({ showCounter: true }).start('Fetching starts...')
59
60
  let step = 1
@@ -1,28 +1,29 @@
1
1
  import path from 'path'
2
2
  import scramjet from 'scramjet'
3
- import format from 'ndjson-csv-xlsx'
3
+ import format from '../../lib/ndjson-csv-xlsx.js'
4
4
  import { createGunzip } from 'zlib'
5
5
 
6
6
  const { json, ndjson, csv, xlsx } = format
7
7
  const { DataStream } = scramjet
8
8
  const supportedExt = ['.json', '.jsonl', '.ndjson', '.csv', '.xlsx']
9
9
 
10
- async function importFrom (source, dest, { trashOld = true, batch, progressFn, useHeader = true } = {}) {
11
- const { error, importPkg, getConfig, getPluginDataDir } = this.bajo.helper
10
+ async function importFrom (source, dest, { trashOld = true, batch = 1, progressFn, converterFn, useHeader = true, fileType, createOpts = {} } = {}, opts = {}) {
11
+ const { error, importPkg, getConfig, getPluginDataDir, secToHms } = this.bajo.helper
12
12
  if (!this.bajoDb) throw error('Bajo DB isn\'t loaded')
13
13
  const { getInfo, recordClear, recordCreate } = this.bajoDb.helper
14
14
  await getInfo(dest)
15
+ const { merge } = await importPkg('lodash-es')
15
16
  const fs = await importPkg('fs-extra')
16
17
  const cfg = getConfig('bajoExtra')
17
18
 
18
19
  let file
19
20
  if (path.isAbsolute(source)) file = source
20
21
  else {
21
- file = `${getPluginDataDir('bajoDb')}/import/${source}`
22
+ file = `${getPluginDataDir('bajoExtra')}/import/${source}`
22
23
  fs.ensureDirSync(path.dirname(file))
23
24
  }
24
25
  if (!fs.existsSync(file)) throw error('Source file \'%s\' doesn\'t exist', file)
25
- let ext = path.extname(file)
26
+ let ext = fileType ? `.${fileType}` : path.extname(file)
26
27
  let decompress = false
27
28
  if (ext === '.gz') {
28
29
  ext = path.extname(path.basename(file, '.gz'))
@@ -31,16 +32,16 @@ async function importFrom (source, dest, { trashOld = true, batch, progressFn, u
31
32
  if (!supportedExt.includes(ext)) throw error('Unsupported format \'%s\'', ext.slice(1))
32
33
  if (trashOld) await recordClear(dest)
33
34
  const reader = fs.createReadStream(file)
34
- batch = parseInt(batch) ?? 100
35
+ batch = parseInt(batch) || 100
35
36
  if (batch > cfg.stream.import.maxBatch) batch = cfg.stream.import.maxBatch
36
37
  if (batch < 0) batch = 1
37
38
  let count = 0
38
39
  const pipes = [reader]
39
40
  if (decompress) pipes.push(createGunzip())
40
- if (ext === '.json') pipes.push(json.parse())
41
- else if (['.ndjson', '.jsonl'].includes(ext)) pipes.push(ndjson.parse())
42
- else if (ext === '.csv') pipes.push(csv.parse({ headers: useHeader }))
43
- else if (ext === '.xlsx') pipes.push(xlsx.parse({ header: useHeader }))
41
+ if (ext === '.json') pipes.push(json.parse(opts))
42
+ else if (['.ndjson', '.jsonl'].includes(ext)) pipes.push(ndjson.parse(opts))
43
+ else if (ext === '.csv') pipes.push(csv.parse(merge({ headers: useHeader }, opts)))
44
+ else if (ext === '.xlsx') pipes.push(xlsx.parse(merge({ header: useHeader }, opts)))
44
45
 
45
46
  const stream = DataStream.pipeline(...pipes)
46
47
  let batchNo = 1
@@ -48,13 +49,15 @@ async function importFrom (source, dest, { trashOld = true, batch, progressFn, u
48
49
  .batch(batch)
49
50
  .map(async items => {
50
51
  if (items.length === 0) return null
51
- if (progressFn) await progressFn.call(this, { batchNo, data: items })
52
- for (let i = 0; i < items.length; i++) {
52
+ const start = Date.now()
53
+ for (let item of items) {
53
54
  count++
54
- await recordCreate(dest, items[i])
55
+ item = converterFn ? await converterFn.call(this, item) : item
56
+ await recordCreate(dest, item, createOpts)
55
57
  }
58
+ const diff = Date.now() - start
59
+ if (progressFn) await progressFn.call(this, { batchNo, data: items, time: secToHms(diff, true), timeMsec: diff })
56
60
  batchNo++
57
- return null
58
61
  })
59
62
  .run()
60
63
 
@@ -33,7 +33,7 @@ async function importFrom ({ path, args }) {
33
33
  default: false
34
34
  })
35
35
  if (!answer) return print.fail('Aborted!', { exit: config.tool })
36
- const spin = spinner().start('Importing...')
36
+ const spin = spinner({ showCounter: true }).start('Importing...')
37
37
  const progressFn = makeProgress.call(this, spin)
38
38
  const cfg = getConfig('bajoDb', { full: true })
39
39
  const { batch } = getConfig()
@@ -0,0 +1,35 @@
1
+ // Borrowed from: https://github.com/fanlia/ndjson-csv-xlsx/blob/main/index.js
2
+
3
+ import ndjson from 'ndjson'
4
+ import csv from 'fast-csv'
5
+ import xlsxparse from 'xlsx-parse-stream'
6
+ import XLSXWriteStream from '@atomictech/xlsx-write-stream'
7
+ import StreamArray from 'stream-json/streamers/StreamArray.js'
8
+ import stringer from 'stream-json/Stringer.js'
9
+ import disassembler from 'stream-json/Disassembler.js'
10
+ import chain from 'stream-chain'
11
+
12
+ export default {
13
+ ndjson: {
14
+ parse: (...args) => ndjson.parse(...args),
15
+ stringify: (...args) => ndjson.stringify(...args)
16
+ },
17
+ csv: {
18
+ parse: (...args) => csv.parse(...args),
19
+ stringify: (...args) => csv.format(...args)
20
+ },
21
+ xlsx: {
22
+ parse: (...args) => xlsxparse(...args),
23
+ stringify: (...args) => new XLSXWriteStream(...args)
24
+ },
25
+ json: {
26
+ parse: (...args) => chain([
27
+ StreamArray.withParser(...args),
28
+ data => data.value
29
+ ]),
30
+ stringify: (options, ...args) => chain([
31
+ disassembler(),
32
+ stringer({ ...options, makeArray: true })
33
+ ])
34
+ }
35
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bajo-extra",
3
- "version": "0.2.11",
3
+ "version": "0.2.13",
4
4
  "description": "Extra package for Bajo Framework",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -26,16 +26,21 @@
26
26
  },
27
27
  "homepage": "https://github.com/ardhi/bajo-extra#readme",
28
28
  "dependencies": {
29
+ "@atomictech/xlsx-write-stream": "^2.0.2",
29
30
  "async": "^3.2.4",
30
31
  "axios": "^1.4.0",
31
32
  "bcrypt": "^5.1.1",
32
33
  "email-addresses": "^5.0.0",
34
+ "fast-csv": "^5.0.1",
33
35
  "fast-jwt": "^3.2.0",
34
36
  "fast-xml-parser": "^4.3.2",
35
37
  "littlehash": "^1.0.1",
36
- "ndjson-csv-xlsx": "^1.1.1",
38
+ "ndjson": "^2.0.0",
37
39
  "performant-array-to-tree": "^1.11.0",
38
40
  "query-string": "^8.1.0",
39
- "scramjet": "^4.36.9"
41
+ "scramjet": "^4.36.9",
42
+ "stream-chain": "^2.2.5",
43
+ "stream-json": "^1.8.0",
44
+ "xlsx-parse-stream": "^1.1.0"
40
45
  }
41
46
  }