@joystick.js/db-canary 0.0.0-canary.2271 → 0.0.0-canary.2273
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/index.js +1 -1
- package/dist/server/lib/bulk_insert_optimizer.js +1 -0
- package/dist/server/lib/memory_efficient_bulk_insert.js +1 -0
- package/package.json +10 -4
- package/src/server/index.js +3 -1
- package/src/server/lib/bulk_insert_optimizer.js +559 -0
- package/src/server/lib/memory_efficient_bulk_insert.js +262 -0
- package/test_runner.js +353 -0
- package/tests/client/index.test.js +3 -1
- package/tests/performance/bulk_insert_1m_test.js +113 -0
- package/tests/performance/bulk_insert_benchmarks.test.js +570 -0
- package/tests/performance/bulk_insert_enterprise_isolated.test.js +469 -0
- package/tests/performance/bulk_insert_enterprise_scale_test.js +216 -0
- package/tests/server/integration/authentication_integration.test.js +3 -1
- package/tests/server/integration/development_mode_authentication.test.js +3 -1
- package/tests/server/integration/production_safety_integration.test.js +3 -1
- package/tests/server/lib/bulk_insert_optimizer.test.js +523 -0
- package/tests/server/lib/operations/admin.test.js +3 -1
package/dist/server/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import S from"net";import{decode as k}from"msgpackr";import x from"./lib/op_types.js";import g from"./lib/safe_json_parse.js";import{load_settings as l,get_settings as m,get_port_configuration as
|
|
1
|
+
import S from"net";import{decode as k}from"msgpackr";import x from"./lib/op_types.js";import g from"./lib/safe_json_parse.js";import{load_settings as l,get_settings as m,get_port_configuration as c}from"./lib/load_settings.js";import{send_error as _}from"./lib/send_response.js";import{start_cluster as T}from"./cluster/index.js";import h from"./lib/logger.js";import{initialize_database as z,cleanup_database as O}from"./lib/query_engine.js";import{create_message_parser as I,encode_message as w}from"./lib/tcp_protocol.js";import{create_connection_manager as R}from"./lib/connection_manager.js";import{shutdown_write_queue as C}from"./lib/write_queue.js";import{setup_authentication as E,verify_password as q,get_client_ip as N,is_rate_limited as $,initialize_auth_manager as A,reset_auth_state as B}from"./lib/auth_manager.js";import{initialize_api_key_manager as D}from"./lib/api_key_manager.js";import{is_development_mode as y,display_development_startup_message as F,warn_undefined_node_env as J}from"./lib/development_mode.js";import{restore_backup as K,start_backup_schedule as P,stop_backup_schedule as j}from"./lib/backup_manager.js";import{initialize_replication_manager as G,shutdown_replication_manager as M}from"./lib/replication_manager.js";import{initialize_write_forwarder as W,shutdown_write_forwarder as H}from"./lib/write_forwarder.js";import{handle_database_operation as U,handle_admin_operation as V,handle_ping_operation as Y}from"./lib/operation_dispatcher.js";import{start_http_server as L,stop_http_server as Q}from"./lib/http_server.js";import{create_recovery_token as X,initialize_recovery_manager as v,reset_recovery_state as Z}from"./lib/recovery_manager.js";import{has_settings as ee}from"./lib/load_settings.js";const i=new Set;let s=null;const re=e=>e&&e.password,d=e=>({ok:0,error:e}),te=()=>({ok:1,version:"1.0.0",message:"Authentication successful"}),u=(e,r)=>{const t=w(r);e.write(t),e.end()},p=(e,r)=>{const t=w(r);e.write(t)},ne=async(e,r={})=>{if(!re(r)){const t=d("Authentication operation requires password to be set in data.");u(e,t);return}try{const t=N(e);if($(t)){const a=d("Too many failed attempts. Please try again later.");u(e,a);return}if(!await q(r.password,t)){const a=d("Authentication failed");u(e,a);return}i.add(e.id);const o=te();p(e,o)}catch(t){const n=d(`Authentication error: ${t.message}`);u(e,n)}},oe=e=>({ok:1,password:e,message:"Authentication setup completed successfully. Save this password - it will not be shown again."}),ae=e=>({ok:0,error:`Setup error: ${e}`}),se=async(e,r={})=>{try{const t=E(),n=oe(t);p(e,n)}catch(t){const n=ae(t.message);p(e,n)}},ie=(e="")=>{if(!e)throw new Error("Must pass an op type for operation.");return x.includes(e)},ce=e=>g(e),_e=e=>{try{const r=k(e);return typeof r=="string"?g(r):r}catch{return null}},ar=e=>{try{return typeof e=="string"?ce(e):Buffer.isBuffer(e)?_e(e):e}catch{return null}},f=e=>y()?!0:i.has(e.id),pe=async(e,r)=>{if(e?.restore_from)try{r.info("Startup restore requested",{backup_filename:e.restore_from});const t=await K(e.restore_from);r.info("Startup restore completed",{backup_filename:e.restore_from,duration_ms:t.duration_ms});const n={...e};delete n.restore_from,process.env.JOYSTICK_DB_SETTINGS=JSON.stringify(n),l(),r.info("Removed restore_from from settings after successful restore")}catch(t){r.error("Startup restore failed",{backup_filename:e.restore_from,error:t.message}),r.info("Continuing with fresh database after restore failure")}},de=()=>{try{return l(),m()}catch{return null}},ue=async e=>{const{tcp_port:r}=c(),t=e?.data_path||`./.joystick/data/joystickdb_${r}`;z(t),A(),await D(),v()},le=e=>{try{G(),e.info("Replication manager initialized")}catch(r){e.warn("Failed to initialize replication manager",{error:r.message})}},me=e=>{try{W(),e.info("Write forwarder initialized")}catch(r){e.warn("Failed to initialize write forwarder",{error:r.message})}},fe=(e,r)=>{if(e?.s3)try{P(),r.info("Backup scheduling started")}catch(t){r.warn("Failed to start backup scheduling",{error:t.message})}},ge=async(e,r)=>{try{const t=await L(e);return t&&r.info("HTTP server started",{http_port:e}),t}catch(t){return r.warn("Failed to start HTTP server",{error:t.message}),null}},he=()=>{if(y()){const{tcp_port:e,http_port:r}=c();F(e,r)}else J()},we=()=>R({max_connections:1e3,idle_timeout:600*1e3,request_timeout:5*1e3}),ye=async(e,r,t,n)=>{s.update_activity(e.id);try{const o=t.parse_messages(r);for(const a of o)await ve(e,a,r.length,n)}catch(o){n.error("Message parsing failed",{client_id:e.id,error:o.message}),_(e,{message:"Invalid message format"}),e.end()}},ve=async(e,r,t,n)=>{const o=r,a=o?.op||null;if(!a){_(e,{message:"Missing operation type"});return}if(!ie(a)){_(e,{message:"Invalid operation type"});return}const b=s.create_request_timeout(e.id,a);try{await be(e,a,o,t)}finally{clearTimeout(b)}},be=async(e,r,t,n)=>{const o=t?.data||{};switch(r){case"authentication":await ne(e,o);break;case"setup":await se(e,o);break;case"insert_one":case"update_one":case"delete_one":case"delete_many":case"bulk_write":case"find_one":case"find":case"count_documents":case"create_index":case"drop_index":case"get_indexes":await U(e,r,o,f,n,s,i);break;case"ping":Y(e);break;case"admin":await V(e,o,f,s,i);break;case"reload":await Se(e);break;default:_(e,{message:`Operation ${r} not implemented`})}},Se=async e=>{if(!f(e)){_(e,{message:"Authentication required"});return}try{const r=ke(),t=await xe(),n=Te(r,t);p(e,n)}catch(r){const t={ok:0,error:`Reload operation failed: ${r.message}`};p(e,t)}},ke=()=>{try{return m()}catch{return null}},xe=async()=>{try{return await l(),m()}catch{return{port:1983,authentication:{}}}},Te=(e,r)=>({ok:1,status:"success",message:"Configuration reloaded successfully",changes:{port_changed:e?e.port!==r.port:!1,authentication_changed:e?e.authentication?.password_hash!==r.authentication?.password_hash:!1},timestamp:new Date().toISOString()}),ze=(e,r)=>{r.info("Client disconnected",{socket_id:e.id}),i.delete(e.id),s.remove_connection(e.id)},Oe=(e,r,t)=>{t.error("Socket error",{socket_id:e.id,error:r.message}),i.delete(e.id),s.remove_connection(e.id)},Ie=(e,r,t)=>{e.on("data",async n=>{await ye(e,n,r,t)}),e.on("end",()=>{ze(e,t)}),e.on("error",n=>{Oe(e,n,t)})},Re=(e,r)=>{if(!s.add_connection(e))return;const t=I();Ie(e,t,r)},Ce=()=>async()=>{try{await Q(),j(),await M(),await H(),s&&s.shutdown(),i.clear(),await C(),await new Promise(e=>setTimeout(e,100)),await O(),B(),Z()}catch{}},sr=async()=>{const{create_context_logger:e}=h("server"),r=e(),t=de();await pe(t,r),await ue(t),le(r),me(r),fe(t,r),s=we();const{http_port:n}=c();await ge(n,r),he();const o=S.createServer((a={})=>{Re(a,r)});return o.cleanup=Ce(),o},Ee=e=>{try{v();const r=X();console.log("Emergency Recovery Token Generated"),console.log(`Visit: ${r.url}`),console.log("Token expires in 10 minutes"),e.info("Recovery token generated via CLI",{expires_at:new Date(r.expires_at).toISOString()}),process.exit(0)}catch(r){console.error("Failed to generate recovery token:",r.message),e.error("Recovery token generation failed",{error:r.message}),process.exit(1)}},qe=()=>{const{tcp_port:e}=c();return{worker_count:process.env.WORKER_COUNT?parseInt(process.env.WORKER_COUNT):void 0,port:e,environment:process.env.NODE_ENV||"development"}},Ne=(e,r)=>{const{tcp_port:t,http_port:n}=c(),o=ee();r.info("Starting JoystickDB server...",{workers:e.worker_count||"auto",tcp_port:t,http_port:n,environment:e.environment,has_settings:o,port_source:o?"JOYSTICK_DB_SETTINGS":"default"})};if(import.meta.url===`file://${process.argv[1]}`){const{create_context_logger:e}=h("main"),r=e();process.argv.includes("--generate-recovery-token")&&Ee(r);const t=qe();Ne(t,r),T(t)}export{ne as authentication,ie as check_op_type,sr as create_server,ar as parse_data,se as setup};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{get_database as f,build_collection_key as b,check_and_grow_map_size as v}from"./query_engine.js";import{get_write_queue as B}from"./write_queue.js";import"./auto_index_manager.js";import R from"./logger.js";const{create_context_logger:w}=R("bulk_insert_optimizer"),C=100*1024*1024,U=1e3,D=1e4,I=(e,r=100)=>{const t=e.slice(0,Math.min(r,e.length)),n=t.reduce((o,s)=>o+Buffer.byteLength(JSON.stringify(s),"utf8"),0);return Math.ceil(n/t.length)},P=(e,r)=>{const t=e*r,n=2,o=1024*1024*1024*10;return Math.max(t*n,o)},J=async e=>{const r=w();if(e.length===0)return;const t=I(e),n=P(e.length,t);r.info("Pre-allocating map size for bulk insert",{document_count:e.length,avg_document_size:t,required_map_size:n,required_map_size_gb:Math.round(n/(1024*1024*1024)*100)/100}),await v();const o=f();if(o.resize)try{o.resize(n),r.info("Map size pre-allocated successfully",{new_map_size:n,new_map_size_gb:Math.round(n/(1024*1024*1024)*100)/100})}catch(s){r.warn("Failed to pre-allocate map size",{error:s.message})}},O=(e,r=C)=>{const t=[];let n=[],o=0;for(const s of e){const i=Buffer.byteLength(JSON.stringify(s),"utf8");o+i>r&&n.length>0?(t.push(n),n=[s],o=i):(n.push(s),o+=i)}return n.length>0&&t.push(n),t},T=(()=>{let e=Date.now()*1e3;return()=>(++e).toString(36).padStart(12,"0")})(),A=(e,r,t)=>e.map(n=>({...n,_id:n._id||T()})).sort((n,o)=>{const s=b(r,t,n._id),i=b(r,t,o._id);return s.localeCompare(i)}),y=(e,r,t)=>{const n=new Date().toISOString();return e.map(o=>{const s=o._id||T(),i={...o,_id:s,_created_at:o._created_at||n,_updated_at:o._updated_at||n},l=JSON.stringify(i);return{key:b(r,t,s),value:l,document_id:s}})},E=async(e,r)=>{const t=[];return await e.transaction(()=>{for(const{key:n,value:o,document_id:s}of r){if(e.get(n))throw new Error(`Document with _id ${s} already exists`);e.put(n,o),t.push(s)}}),t},F=async function*(e,r,t,n=U){const o=f();for(let s=0;s<e.length;s+=n){const i=e.slice(s,s+n),l=y(i,r,t);yield await E(o,l),i.length=0,l.length=0;const c=Math.floor(s/n);e.length>=5e6?(c%5===0&&global.gc&&(global.gc(),await new Promise(a=>setTimeout(a,100))),await new Promise(a=>setImmediate(a))):e.length>=1e6?(c%8===0&&global.gc&&(global.gc(),await new Promise(a=>setTimeout(a,75))),await new Promise(a=>setImmediate(a))):e.length>1e5?(c%25===0&&global.gc&&(global.gc(),await new Promise(a=>setTimeout(a,25))),await new Promise(a=>setImmediate(a))):c%10===0&&await new Promise(a=>setImmediate(a))}},G=()=>!1,Z=e=>{},j=async(e,r)=>{w().debug("Index rebuilding skipped (not implemented)",{database:e,collection:r})},p=async(e,r,t,n={})=>{const{disable_indexing:o=!0,pre_allocate_map_size:s=!0,sort_keys:i=!0,stream_processing:l=!0,batch_size:_=U}=n,c=w(),a=Date.now(),h=process.memoryUsage();if(!e||!r)throw new Error("Database name and collection name are required");if(!Array.isArray(t)||t.length===0)throw new Error("Documents must be a non-empty array");c.info("Starting optimized bulk insert",{database:e,collection:r,document_count:t.length,options:n});let k=!1;try{s&&await J(t),o&&(k=G());let u=t;i&&(u=A(t,e,r));const m=[];let d=0;if(l)for await(const g of F(u,e,r,_))m.push(...g),d+=g.length,d%D===0&&c.info("Bulk insert progress",{processed:d,total:t.length,percentage:Math.round(d/t.length*100)});else{const g=O(u),q=f();for(const L of g){const N=y(L,e,r),S=await E(q,N);m.push(...S),d+=S.length,d%D===0&&c.info("Bulk insert progress",{processed:d,total:t.length,percentage:Math.round(d/t.length*100)})}}o&&await j(e,r);const z=Date.now(),M=process.memoryUsage(),x={duration_ms:z-a,documents_per_second:Math.round(t.length/((z-a)/1e3)),memory_delta_mb:Math.round((M.heapUsed-h.heapUsed)/(1024*1024)),peak_memory_mb:Math.round(M.heapUsed/(1024*1024))};return c.info("Optimized bulk insert completed",{database:e,collection:r,inserted_count:m.length,performance:x}),{acknowledged:!0,inserted_count:m.length,inserted_ids:m,performance:x}}catch(u){throw c.error("Optimized bulk insert failed",{database:e,collection:r,error:u.message}),u}finally{o&&Z(k)}},H=async(e,r,t,n={})=>{const{chunk_size:o=1e4}=n,s={acknowledged:!0,inserted_count:0,inserted_ids:[],performance:{duration_ms:0,documents_per_second:0,memory_delta_mb:0,peak_memory_mb:0}},i=Date.now();for(let _=0;_<t.length;_+=o){const c=t.slice(_,_+o),a=await p(e,r,c,n);s.inserted_count+=a.inserted_count,s.inserted_ids.push(...a.inserted_ids),await new Promise(h=>setImmediate(h))}const l=Date.now();return s.performance.duration_ms=l-i,s.performance.documents_per_second=Math.round(t.length/((l-i)/1e3)),s},V=async(e,r,t,n={})=>{const o=Date.now(),s=process.memoryUsage(),i=await p(e,r,t,n),l=Date.now(),_=process.memoryUsage();return{...i,performance:{...i.performance,total_duration_ms:l-o,memory_usage:{start_heap_mb:Math.round(s.heapUsed/(1024*1024)),end_heap_mb:Math.round(_.heapUsed/(1024*1024)),delta_heap_mb:Math.round((_.heapUsed-s.heapUsed)/(1024*1024)),peak_heap_mb:Math.round(_.heapUsed/(1024*1024))}}}},$=async(e,r,t,n={})=>{const o=B(),s={operation:"bulk_insert_optimized",database:e,collection:r,document_count:t.length};return await o.enqueue_write_operation(()=>p(e,r,t,n),s)};export{$ as bulk_insert,p as bulk_insert_optimized,V as bulk_insert_with_metrics,I as calculate_average_document_size,P as calculate_bulk_map_size,O as create_size_based_batches,H as non_blocking_bulk_insert,y as pre_encode_documents,A as sort_documents_by_key};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{bulk_insert_optimized as z}from"./bulk_insert_optimizer.js";import k from"./logger.js";const{create_context_logger:U}=k("memory_efficient_bulk_insert"),M=async function*(r,n={}){const{batch_size:a=1e3,document_template:c="minimal",test_id:i=Date.now().toString(36)}=n;for(let s=0;s<r;s+=a){const d=Math.min(a,r-s),l=[];for(let _=0;_<d;_++){const e=s+_;let o;c==="minimal"?o={_id:`mem_${i}_${e.toString().padStart(8,"0")}`,idx:e,cat:e%50,val:e%1e3}:c==="medium"?o={_id:`mem_${i}_${e.toString().padStart(8,"0")}`,name:`Document ${e}`,index:e,category:`category_${e%100}`,active:e%2===0,priority:e%5,score:Math.random()*100,created_timestamp:Date.now()+e}:c==="large"&&(o={_id:`mem_${i}_${e.toString().padStart(8,"0")}`,name:`Large Document ${e}`,index:e,category:`category_${e%100}`,subcategory:`subcategory_${e%20}`,active:e%2===0,priority:e%5,score:Math.random()*100,created_timestamp:Date.now()+e,description:`This is a large document with index ${e} for performance testing purposes.`,metadata:{created_by:`user_${e%1e3}`,department:`dept_${e%50}`,project:`project_${e%200}`,tags:[`tag_${e%10}`,`tag_${(e+1)%10}`]},measurements:Array.from({length:5},(f,p)=>({timestamp:Date.now()+e+p,value:Math.random()*1e3}))}),l.push(o)}yield l,await new Promise(_=>setImmediate(_))}},x=async(r,n,a,c={})=>{const{generation_batch_size:i=1e3,insert_batch_size:s=250,document_template:d="minimal",disable_indexing:l=!0,pre_allocate_map_size:_=!0,sort_keys:e=!0}=c,o=U(),f=Date.now(),p=process.memoryUsage();o.info("Starting memory-efficient bulk insert",{database:r,collection:n,document_count:a,generation_batch_size:i,insert_batch_size:s,document_template:d});const h=[];let b=0,m=0;try{for await(const w of M(a,{batch_size:i,document_template:d})){const $=await z(r,n,w,{disable_indexing:l,pre_allocate_map_size:m===0?_:!1,sort_keys:e,stream_processing:!0,batch_size:s});if(h.push(...$.inserted_ids),b+=$.inserted_count,m++,w.length=0,m%10===0){const t=process.memoryUsage();o.info("Memory-efficient bulk insert progress",{processed:b,total:a,percentage:Math.round(b/a*100),current_heap_mb:Math.round(t.heapUsed/(1024*1024)),batches_processed:m})}a>=1e7?(m%20===0&&global.gc&&(global.gc(),await new Promise(t=>setTimeout(t,25))),m%5===0&&await new Promise(t=>setImmediate(t))):a>=5e6?(m%10===0&&global.gc&&(global.gc(),await new Promise(t=>setTimeout(t,50))),m%2===0&&await new Promise(t=>setImmediate(t))):a>=1e6?(m%10===0&&global.gc&&(global.gc(),await new Promise(t=>setTimeout(t,50))),await new Promise(t=>setImmediate(t))):await new Promise(t=>setImmediate(t))}const g=Date.now(),u=process.memoryUsage(),y={duration_ms:g-f,documents_per_second:Math.round(a/((g-f)/1e3)),memory_usage:{start_heap_mb:Math.round(p.heapUsed/(1024*1024)),end_heap_mb:Math.round(u.heapUsed/(1024*1024)),delta_heap_mb:Math.round((u.heapUsed-p.heapUsed)/(1024*1024)),peak_heap_mb:Math.round(u.heapUsed/(1024*1024))}};return o.info("Memory-efficient bulk insert completed",{database:r,collection:n,inserted_count:h.length,performance:y}),{acknowledged:!0,inserted_count:h.length,inserted_ids:h,performance:y}}catch(g){throw o.error("Memory-efficient bulk insert failed",{database:r,collection:n,error:g.message}),g}},D=(r,n="minimal",a=1e3)=>{const c={minimal:50,medium:200,large:500},i=c[n]||c.minimal,s=Math.round(a*i/(1024*1024)),d=Math.round(r*i/(1024*1024)),l=s*3+100;return{avg_document_size_bytes:i,total_data_size_mb:d,batch_memory_mb:s,estimated_peak_memory_mb:l,recommended_batch_size:r>=1e7?2e3:r>=5e6?1e3:r>=1e6?750:1e3}};export{D as estimate_memory_usage,M as generate_documents_streaming,x as memory_efficient_bulk_insert};
|
package/package.json
CHANGED
|
@@ -1,16 +1,21 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@joystick.js/db-canary",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.0.0-canary.
|
|
5
|
-
"canary_version": "0.0.0-canary.
|
|
4
|
+
"version": "0.0.0-canary.2273",
|
|
5
|
+
"canary_version": "0.0.0-canary.2272",
|
|
6
6
|
"description": "JoystickDB - A minimalist database server for the Joystick framework",
|
|
7
7
|
"main": "./dist/server/index.js",
|
|
8
8
|
"scripts": {
|
|
9
9
|
"build": "node ./.build/index.js",
|
|
10
10
|
"release": "node increment_version.js && npm run build && npm publish",
|
|
11
11
|
"start": "node src/server/index.js",
|
|
12
|
-
"test": "NODE_ENV=test ava --serial",
|
|
12
|
+
"test": "NODE_ENV=test NODE_OPTIONS='--expose-gc --max-old-space-size=8192' ava --serial --timeout=10m",
|
|
13
13
|
"test:watch": "NODE_ENV=test ava --watch",
|
|
14
|
+
"test:performance": "NODE_ENV=test NODE_OPTIONS='--expose-gc --max-old-space-size=16384' ava --serial --timeout=30m tests/performance/*.test.js",
|
|
15
|
+
"test:enterprise": "NODE_ENV=test NODE_OPTIONS='--expose-gc --max-old-space-size=16384' ava --serial --timeout=30m tests/performance/bulk_insert_enterprise_*.test.js",
|
|
16
|
+
"test:benchmarks": "NODE_ENV=test NODE_OPTIONS='--expose-gc --max-old-space-size=16384' ava --serial --timeout=30m tests/performance/bulk_insert_benchmarks.test.js",
|
|
17
|
+
"test:1m": "NODE_ENV=test NODE_OPTIONS='--expose-gc --max-old-space-size=8192' ava --serial --timeout=15m tests/performance/bulk_insert_1m_test.js",
|
|
18
|
+
"test:runner": "node test_runner.js",
|
|
14
19
|
"build:types": "tsc --declaration --emitDeclarationOnly --allowJs --outDir types src/**/*.js",
|
|
15
20
|
"build:types:client": "tsc --declaration --emitDeclarationOnly --allowJs --outDir types/client src/client/*.js",
|
|
16
21
|
"build:types:server": "tsc --declaration --emitDeclarationOnly --allowJs --outDir types/server src/server/**/*.js"
|
|
@@ -33,7 +38,8 @@
|
|
|
33
38
|
},
|
|
34
39
|
"ava": {
|
|
35
40
|
"files": [
|
|
36
|
-
"tests/**/*.test.js"
|
|
41
|
+
"tests/**/*.test.js",
|
|
42
|
+
"!tests/performance/*.test.js"
|
|
37
43
|
],
|
|
38
44
|
"verbose": true
|
|
39
45
|
},
|
package/src/server/index.js
CHANGED
|
@@ -315,7 +315,9 @@ const load_server_settings = () => {
|
|
|
315
315
|
* @param {Object} settings - Server settings
|
|
316
316
|
*/
|
|
317
317
|
const initialize_server_components = async (settings) => {
|
|
318
|
-
|
|
318
|
+
// NOTE: Use proper .joystick/data path with port number as fallback, matching other database providers
|
|
319
|
+
const { tcp_port } = get_port_configuration();
|
|
320
|
+
const database_path = settings?.data_path || `./.joystick/data/joystickdb_${tcp_port}`;
|
|
319
321
|
|
|
320
322
|
initialize_database(database_path);
|
|
321
323
|
initialize_auth_manager();
|
|
@@ -0,0 +1,559 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Bulk insert performance optimizer for JoystickDB.
|
|
3
|
+
*
|
|
4
|
+
* Provides enterprise-scale bulk insert capabilities with optimizations for:
|
|
5
|
+
* - Map size pre-allocation to prevent MDB_MAP_FULL errors
|
|
6
|
+
* - Size-based transaction batching for optimal performance
|
|
7
|
+
* - Key ordering and append mode for B-tree efficiency
|
|
8
|
+
* - Direct serialization to eliminate double encoding overhead
|
|
9
|
+
* - Safe index management with deferred rebuilding
|
|
10
|
+
* - Memory management and streaming processing
|
|
11
|
+
* - Concurrent read safety during bulk operations
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { get_database, generate_document_id, build_collection_key, check_and_grow_map_size } from './query_engine.js';
|
|
15
|
+
import { get_write_queue } from './write_queue.js';
|
|
16
|
+
import { get_auto_index_database, initialize_auto_index_database } from './auto_index_manager.js';
|
|
17
|
+
import create_logger from './logger.js';
|
|
18
|
+
|
|
19
|
+
const { create_context_logger } = create_logger('bulk_insert_optimizer');
|
|
20
|
+
|
|
21
|
+
/** @type {number} Optimal transaction size in bytes (100MB) */
|
|
22
|
+
const OPTIMAL_TRANSACTION_SIZE = 100 * 1024 * 1024;
|
|
23
|
+
|
|
24
|
+
/** @type {number} Default batch size for streaming processing */
|
|
25
|
+
const DEFAULT_STREAM_BATCH_SIZE = 1000;
|
|
26
|
+
|
|
27
|
+
/** @type {number} Progress logging interval */
|
|
28
|
+
const PROGRESS_LOG_INTERVAL = 10000;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Calculates the average document size from a sample.
|
|
32
|
+
* @param {Array<Object>} documents - Sample documents
|
|
33
|
+
* @param {number} [sample_size=100] - Number of documents to sample
|
|
34
|
+
* @returns {number} Average document size in bytes
|
|
35
|
+
*/
|
|
36
|
+
const calculate_average_document_size = (documents, sample_size = 100) => {
|
|
37
|
+
const sample = documents.slice(0, Math.min(sample_size, documents.length));
|
|
38
|
+
const total_size = sample.reduce((sum, doc) => {
|
|
39
|
+
return sum + Buffer.byteLength(JSON.stringify(doc), 'utf8');
|
|
40
|
+
}, 0);
|
|
41
|
+
|
|
42
|
+
return Math.ceil(total_size / sample.length);
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Calculates required map size for bulk insert operation.
|
|
47
|
+
* @param {number} document_count - Number of documents to insert
|
|
48
|
+
* @param {number} avg_document_size - Average document size in bytes
|
|
49
|
+
* @returns {number} Required map size in bytes
|
|
50
|
+
*/
|
|
51
|
+
const calculate_bulk_map_size = (document_count, avg_document_size) => {
|
|
52
|
+
const estimated_size = document_count * avg_document_size;
|
|
53
|
+
const safety_factor = 2.0; // 100% overhead for indexes and growth
|
|
54
|
+
const minimum_size = 1024 * 1024 * 1024 * 10; // 10GB minimum
|
|
55
|
+
|
|
56
|
+
return Math.max(estimated_size * safety_factor, minimum_size);
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Pre-allocates map size for bulk insert operation.
|
|
61
|
+
* @param {Array<Object>} documents - Documents to be inserted
|
|
62
|
+
* @returns {Promise<void>}
|
|
63
|
+
*/
|
|
64
|
+
const prepare_bulk_insert_map_size = async (documents) => {
|
|
65
|
+
const log = create_context_logger();
|
|
66
|
+
|
|
67
|
+
if (documents.length === 0) {
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const avg_size = calculate_average_document_size(documents);
|
|
72
|
+
const required_map_size = calculate_bulk_map_size(documents.length, avg_size);
|
|
73
|
+
|
|
74
|
+
log.info('Pre-allocating map size for bulk insert', {
|
|
75
|
+
document_count: documents.length,
|
|
76
|
+
avg_document_size: avg_size,
|
|
77
|
+
required_map_size,
|
|
78
|
+
required_map_size_gb: Math.round(required_map_size / (1024 * 1024 * 1024) * 100) / 100
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
// Trigger map size growth check
|
|
82
|
+
await check_and_grow_map_size();
|
|
83
|
+
|
|
84
|
+
const db = get_database();
|
|
85
|
+
if (db.resize) {
|
|
86
|
+
try {
|
|
87
|
+
db.resize(required_map_size);
|
|
88
|
+
log.info('Map size pre-allocated successfully', {
|
|
89
|
+
new_map_size: required_map_size,
|
|
90
|
+
new_map_size_gb: Math.round(required_map_size / (1024 * 1024 * 1024) * 100) / 100
|
|
91
|
+
});
|
|
92
|
+
} catch (error) {
|
|
93
|
+
log.warn('Failed to pre-allocate map size', { error: error.message });
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Creates size-based batches for optimal transaction performance.
|
|
100
|
+
* @param {Array<Object>} documents - Documents to batch
|
|
101
|
+
* @param {number} [target_size=OPTIMAL_TRANSACTION_SIZE] - Target batch size in bytes
|
|
102
|
+
* @returns {Array<Array<Object>>} Array of document batches
|
|
103
|
+
*/
|
|
104
|
+
const create_size_based_batches = (documents, target_size = OPTIMAL_TRANSACTION_SIZE) => {
|
|
105
|
+
const batches = [];
|
|
106
|
+
let current_batch = [];
|
|
107
|
+
let current_size = 0;
|
|
108
|
+
|
|
109
|
+
for (const doc of documents) {
|
|
110
|
+
const doc_size = Buffer.byteLength(JSON.stringify(doc), 'utf8');
|
|
111
|
+
|
|
112
|
+
if (current_size + doc_size > target_size && current_batch.length > 0) {
|
|
113
|
+
batches.push(current_batch);
|
|
114
|
+
current_batch = [doc];
|
|
115
|
+
current_size = doc_size;
|
|
116
|
+
} else {
|
|
117
|
+
current_batch.push(doc);
|
|
118
|
+
current_size += doc_size;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (current_batch.length > 0) {
|
|
123
|
+
batches.push(current_batch);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return batches;
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Generates sequential document ID for optimal key ordering.
|
|
131
|
+
* @returns {string} Sequential document ID
|
|
132
|
+
*/
|
|
133
|
+
const generate_sequential_id = (() => {
|
|
134
|
+
let counter = Date.now() * 1000; // Microsecond precision
|
|
135
|
+
return () => {
|
|
136
|
+
return (++counter).toString(36).padStart(12, '0');
|
|
137
|
+
};
|
|
138
|
+
})();
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Sorts documents by key for optimal B-tree insertion.
|
|
142
|
+
* @param {Array<Object>} documents - Documents to sort
|
|
143
|
+
* @param {string} database_name - Database name
|
|
144
|
+
* @param {string} collection_name - Collection name
|
|
145
|
+
* @returns {Array<Object>} Sorted documents with assigned IDs
|
|
146
|
+
*/
|
|
147
|
+
const sort_documents_by_key = (documents, database_name, collection_name) => {
|
|
148
|
+
return documents.map(doc => ({
|
|
149
|
+
...doc,
|
|
150
|
+
_id: doc._id || generate_sequential_id()
|
|
151
|
+
})).sort((a, b) => {
|
|
152
|
+
const key_a = build_collection_key(database_name, collection_name, a._id);
|
|
153
|
+
const key_b = build_collection_key(database_name, collection_name, b._id);
|
|
154
|
+
return key_a.localeCompare(key_b);
|
|
155
|
+
});
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Pre-encodes documents as Buffers for direct LMDB storage.
|
|
160
|
+
* @param {Array<Object>} documents - Documents to encode
|
|
161
|
+
* @param {string} database_name - Database name
|
|
162
|
+
* @param {string} collection_name - Collection name
|
|
163
|
+
* @returns {Array<Object>} Encoded document entries
|
|
164
|
+
*/
|
|
165
|
+
const pre_encode_documents = (documents, database_name, collection_name) => {
|
|
166
|
+
const current_timestamp = new Date().toISOString();
|
|
167
|
+
|
|
168
|
+
return documents.map(doc => {
|
|
169
|
+
// Ensure document has an ID
|
|
170
|
+
const document_id = doc._id || generate_sequential_id();
|
|
171
|
+
|
|
172
|
+
const document_with_timestamps = {
|
|
173
|
+
...doc,
|
|
174
|
+
_id: document_id,
|
|
175
|
+
_created_at: doc._created_at || current_timestamp,
|
|
176
|
+
_updated_at: doc._updated_at || current_timestamp
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
const json_string = JSON.stringify(document_with_timestamps);
|
|
180
|
+
const key = build_collection_key(database_name, collection_name, document_id);
|
|
181
|
+
|
|
182
|
+
return {
|
|
183
|
+
key,
|
|
184
|
+
value: json_string, // Store as string for LMDB msgpack encoding
|
|
185
|
+
document_id: document_id
|
|
186
|
+
};
|
|
187
|
+
});
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Performs optimized bulk insert with pre-encoded documents.
|
|
192
|
+
* @param {Object} db - Database instance
|
|
193
|
+
* @param {Array<Object>} encoded_documents - Pre-encoded document entries
|
|
194
|
+
* @returns {Promise<Array<string>>} Array of inserted document IDs
|
|
195
|
+
*/
|
|
196
|
+
const bulk_insert_pre_encoded = async (db, encoded_documents) => {
|
|
197
|
+
const inserted_ids = [];
|
|
198
|
+
|
|
199
|
+
await db.transaction(() => {
|
|
200
|
+
for (const { key, value, document_id } of encoded_documents) {
|
|
201
|
+
// Check if document already exists
|
|
202
|
+
const existing = db.get(key);
|
|
203
|
+
if (existing) {
|
|
204
|
+
throw new Error(`Document with _id ${document_id} already exists`);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
db.put(key, value);
|
|
208
|
+
inserted_ids.push(document_id);
|
|
209
|
+
}
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
return inserted_ids;
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Streaming bulk insert processor with aggressive memory management.
|
|
217
|
+
* @param {Array<Object>} documents - Documents to insert
|
|
218
|
+
* @param {string} database_name - Database name
|
|
219
|
+
* @param {string} collection_name - Collection name
|
|
220
|
+
* @param {number} [batch_size=DEFAULT_STREAM_BATCH_SIZE] - Streaming batch size
|
|
221
|
+
* @returns {AsyncGenerator<Array<string>>} Generator yielding inserted document IDs
|
|
222
|
+
*/
|
|
223
|
+
const stream_bulk_insert = async function* (documents, database_name, collection_name, batch_size = DEFAULT_STREAM_BATCH_SIZE) {
|
|
224
|
+
const db = get_database();
|
|
225
|
+
|
|
226
|
+
for (let i = 0; i < documents.length; i += batch_size) {
|
|
227
|
+
const batch = documents.slice(i, i + batch_size);
|
|
228
|
+
const encoded_batch = pre_encode_documents(batch, database_name, collection_name);
|
|
229
|
+
|
|
230
|
+
const inserted_ids = await bulk_insert_pre_encoded(db, encoded_batch);
|
|
231
|
+
yield inserted_ids;
|
|
232
|
+
|
|
233
|
+
// Clear batch references immediately to help GC
|
|
234
|
+
batch.length = 0;
|
|
235
|
+
encoded_batch.length = 0;
|
|
236
|
+
|
|
237
|
+
const batch_number = Math.floor(i / batch_size);
|
|
238
|
+
|
|
239
|
+
// Ultra-aggressive memory management for very large datasets
|
|
240
|
+
if (documents.length >= 5000000) {
|
|
241
|
+
// For 5M+ documents, force GC every 5 batches with longer delays
|
|
242
|
+
if (batch_number % 5 === 0 && global.gc) {
|
|
243
|
+
global.gc();
|
|
244
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
245
|
+
}
|
|
246
|
+
// Always yield to event loop for very large datasets
|
|
247
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
248
|
+
} else if (documents.length >= 1000000) {
|
|
249
|
+
// For 1M+ documents, force GC every 8 batches
|
|
250
|
+
if (batch_number % 8 === 0 && global.gc) {
|
|
251
|
+
global.gc();
|
|
252
|
+
await new Promise(resolve => setTimeout(resolve, 75));
|
|
253
|
+
}
|
|
254
|
+
// Yield every batch for large datasets
|
|
255
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
256
|
+
} else if (documents.length > 100000) {
|
|
257
|
+
// For 100K-1M documents, force GC every 25 batches
|
|
258
|
+
if (batch_number % 25 === 0 && global.gc) {
|
|
259
|
+
global.gc();
|
|
260
|
+
await new Promise(resolve => setTimeout(resolve, 25));
|
|
261
|
+
}
|
|
262
|
+
// Yield every batch for medium datasets
|
|
263
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
264
|
+
} else {
|
|
265
|
+
// For smaller datasets, yield every 10 batches as before
|
|
266
|
+
if (batch_number % 10 === 0) {
|
|
267
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Safely disables auto-indexing during bulk operations.
|
|
275
|
+
* @returns {boolean} Previous auto-indexing state
|
|
276
|
+
*/
|
|
277
|
+
const disable_auto_indexing = () => {
|
|
278
|
+
// Auto-indexing management will be implemented in future versions
|
|
279
|
+
// For now, return false to indicate no auto-indexing was disabled
|
|
280
|
+
return false;
|
|
281
|
+
};
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Re-enables auto-indexing after bulk operations.
|
|
285
|
+
* @param {boolean} was_enabled - Previous auto-indexing state
|
|
286
|
+
*/
|
|
287
|
+
const restore_auto_indexing = (was_enabled) => {
|
|
288
|
+
// Auto-indexing management will be implemented in future versions
|
|
289
|
+
// For now, this is a no-op
|
|
290
|
+
};
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Rebuilds collection indexes after bulk insert.
|
|
294
|
+
* @param {string} database_name - Database name
|
|
295
|
+
* @param {string} collection_name - Collection name
|
|
296
|
+
* @returns {Promise<void>}
|
|
297
|
+
*/
|
|
298
|
+
const rebuild_collection_indexes = async (database_name, collection_name) => {
|
|
299
|
+
const log = create_context_logger();
|
|
300
|
+
|
|
301
|
+
// Index rebuilding will be implemented in future versions
|
|
302
|
+
// For now, this is a no-op
|
|
303
|
+
log.debug('Index rebuilding skipped (not implemented)', {
|
|
304
|
+
database: database_name,
|
|
305
|
+
collection: collection_name
|
|
306
|
+
});
|
|
307
|
+
};
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Optimized bulk insert implementation with all performance optimizations.
|
|
311
|
+
* @param {string} database_name - Database name
|
|
312
|
+
* @param {string} collection_name - Collection name
|
|
313
|
+
* @param {Array<Object>} documents - Documents to insert
|
|
314
|
+
* @param {Object} [options={}] - Optimization options
|
|
315
|
+
* @returns {Promise<Object>} Bulk insert results with performance metrics
|
|
316
|
+
*/
|
|
317
|
+
const bulk_insert_optimized = async (database_name, collection_name, documents, options = {}) => {
|
|
318
|
+
const {
|
|
319
|
+
disable_indexing = true,
|
|
320
|
+
pre_allocate_map_size = true,
|
|
321
|
+
sort_keys = true,
|
|
322
|
+
stream_processing = true,
|
|
323
|
+
batch_size = DEFAULT_STREAM_BATCH_SIZE
|
|
324
|
+
} = options;
|
|
325
|
+
|
|
326
|
+
const log = create_context_logger();
|
|
327
|
+
const start_time = Date.now();
|
|
328
|
+
const start_memory = process.memoryUsage();
|
|
329
|
+
|
|
330
|
+
// Validate parameters
|
|
331
|
+
if (!database_name || !collection_name) {
|
|
332
|
+
throw new Error('Database name and collection name are required');
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
if (!Array.isArray(documents) || documents.length === 0) {
|
|
336
|
+
throw new Error('Documents must be a non-empty array');
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
log.info('Starting optimized bulk insert', {
|
|
340
|
+
database: database_name,
|
|
341
|
+
collection: collection_name,
|
|
342
|
+
document_count: documents.length,
|
|
343
|
+
options
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
let auto_index_was_enabled = false;
|
|
347
|
+
|
|
348
|
+
try {
|
|
349
|
+
// Phase 1: Pre-allocate map size
|
|
350
|
+
if (pre_allocate_map_size) {
|
|
351
|
+
await prepare_bulk_insert_map_size(documents);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// Phase 2: Disable auto-indexing
|
|
355
|
+
if (disable_indexing) {
|
|
356
|
+
auto_index_was_enabled = disable_auto_indexing();
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
// Phase 3: Sort documents by key
|
|
360
|
+
let processed_documents = documents;
|
|
361
|
+
if (sort_keys) {
|
|
362
|
+
processed_documents = sort_documents_by_key(documents, database_name, collection_name);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// Phase 4: Process documents
|
|
366
|
+
const all_inserted_ids = [];
|
|
367
|
+
let processed_count = 0;
|
|
368
|
+
|
|
369
|
+
if (stream_processing) {
|
|
370
|
+
// Streaming processing for memory efficiency
|
|
371
|
+
for await (const inserted_ids of stream_bulk_insert(processed_documents, database_name, collection_name, batch_size)) {
|
|
372
|
+
all_inserted_ids.push(...inserted_ids);
|
|
373
|
+
processed_count += inserted_ids.length;
|
|
374
|
+
|
|
375
|
+
// Log progress
|
|
376
|
+
if (processed_count % PROGRESS_LOG_INTERVAL === 0) {
|
|
377
|
+
log.info('Bulk insert progress', {
|
|
378
|
+
processed: processed_count,
|
|
379
|
+
total: documents.length,
|
|
380
|
+
percentage: Math.round((processed_count / documents.length) * 100)
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
} else {
|
|
385
|
+
// Batch processing for smaller datasets
|
|
386
|
+
const batches = create_size_based_batches(processed_documents);
|
|
387
|
+
const db = get_database();
|
|
388
|
+
|
|
389
|
+
for (const batch of batches) {
|
|
390
|
+
const encoded_batch = pre_encode_documents(batch, database_name, collection_name);
|
|
391
|
+
const inserted_ids = await bulk_insert_pre_encoded(db, encoded_batch);
|
|
392
|
+
all_inserted_ids.push(...inserted_ids);
|
|
393
|
+
processed_count += inserted_ids.length;
|
|
394
|
+
|
|
395
|
+
// Log progress
|
|
396
|
+
if (processed_count % PROGRESS_LOG_INTERVAL === 0) {
|
|
397
|
+
log.info('Bulk insert progress', {
|
|
398
|
+
processed: processed_count,
|
|
399
|
+
total: documents.length,
|
|
400
|
+
percentage: Math.round((processed_count / documents.length) * 100)
|
|
401
|
+
});
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Phase 5: Rebuild indexes
|
|
407
|
+
if (disable_indexing) {
|
|
408
|
+
await rebuild_collection_indexes(database_name, collection_name);
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
const end_time = Date.now();
|
|
412
|
+
const end_memory = process.memoryUsage();
|
|
413
|
+
|
|
414
|
+
const performance_metrics = {
|
|
415
|
+
duration_ms: end_time - start_time,
|
|
416
|
+
documents_per_second: Math.round(documents.length / ((end_time - start_time) / 1000)),
|
|
417
|
+
memory_delta_mb: Math.round((end_memory.heapUsed - start_memory.heapUsed) / (1024 * 1024)),
|
|
418
|
+
peak_memory_mb: Math.round(end_memory.heapUsed / (1024 * 1024))
|
|
419
|
+
};
|
|
420
|
+
|
|
421
|
+
log.info('Optimized bulk insert completed', {
|
|
422
|
+
database: database_name,
|
|
423
|
+
collection: collection_name,
|
|
424
|
+
inserted_count: all_inserted_ids.length,
|
|
425
|
+
performance: performance_metrics
|
|
426
|
+
});
|
|
427
|
+
|
|
428
|
+
return {
|
|
429
|
+
acknowledged: true,
|
|
430
|
+
inserted_count: all_inserted_ids.length,
|
|
431
|
+
inserted_ids: all_inserted_ids,
|
|
432
|
+
performance: performance_metrics
|
|
433
|
+
};
|
|
434
|
+
|
|
435
|
+
} catch (error) {
|
|
436
|
+
log.error('Optimized bulk insert failed', {
|
|
437
|
+
database: database_name,
|
|
438
|
+
collection: collection_name,
|
|
439
|
+
error: error.message
|
|
440
|
+
});
|
|
441
|
+
throw error;
|
|
442
|
+
} finally {
|
|
443
|
+
// Always restore auto-indexing
|
|
444
|
+
if (disable_indexing) {
|
|
445
|
+
restore_auto_indexing(auto_index_was_enabled);
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
};
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* Non-blocking bulk insert that yields to allow concurrent reads.
|
|
452
|
+
* @param {string} database_name - Database name
|
|
453
|
+
* @param {string} collection_name - Collection name
|
|
454
|
+
* @param {Array<Object>} documents - Documents to insert
|
|
455
|
+
* @param {Object} [options={}] - Options
|
|
456
|
+
* @returns {Promise<Object>} Bulk insert results
|
|
457
|
+
*/
|
|
458
|
+
const non_blocking_bulk_insert = async (database_name, collection_name, documents, options = {}) => {
|
|
459
|
+
const { chunk_size = 10000 } = options;
|
|
460
|
+
|
|
461
|
+
const all_results = {
|
|
462
|
+
acknowledged: true,
|
|
463
|
+
inserted_count: 0,
|
|
464
|
+
inserted_ids: [],
|
|
465
|
+
performance: {
|
|
466
|
+
duration_ms: 0,
|
|
467
|
+
documents_per_second: 0,
|
|
468
|
+
memory_delta_mb: 0,
|
|
469
|
+
peak_memory_mb: 0
|
|
470
|
+
}
|
|
471
|
+
};
|
|
472
|
+
|
|
473
|
+
const start_time = Date.now();
|
|
474
|
+
|
|
475
|
+
// Process in smaller chunks to ensure reads are never blocked
|
|
476
|
+
for (let i = 0; i < documents.length; i += chunk_size) {
|
|
477
|
+
const chunk = documents.slice(i, i + chunk_size);
|
|
478
|
+
const result = await bulk_insert_optimized(database_name, collection_name, chunk, options);
|
|
479
|
+
|
|
480
|
+
all_results.inserted_count += result.inserted_count;
|
|
481
|
+
all_results.inserted_ids.push(...result.inserted_ids);
|
|
482
|
+
|
|
483
|
+
// Brief yield to allow reads to proceed
|
|
484
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
const end_time = Date.now();
|
|
488
|
+
all_results.performance.duration_ms = end_time - start_time;
|
|
489
|
+
all_results.performance.documents_per_second = Math.round(documents.length / ((end_time - start_time) / 1000));
|
|
490
|
+
|
|
491
|
+
return all_results;
|
|
492
|
+
};
|
|
493
|
+
|
|
494
|
+
/**
|
|
495
|
+
* Bulk insert with performance monitoring and metrics.
|
|
496
|
+
* @param {string} database_name - Database name
|
|
497
|
+
* @param {string} collection_name - Collection name
|
|
498
|
+
* @param {Array<Object>} documents - Documents to insert
|
|
499
|
+
* @param {Object} [options={}] - Options
|
|
500
|
+
* @returns {Promise<Object>} Bulk insert results with detailed metrics
|
|
501
|
+
*/
|
|
502
|
+
const bulk_insert_with_metrics = async (database_name, collection_name, documents, options = {}) => {
|
|
503
|
+
const start_time = Date.now();
|
|
504
|
+
const start_memory = process.memoryUsage();
|
|
505
|
+
|
|
506
|
+
const result = await bulk_insert_optimized(database_name, collection_name, documents, options);
|
|
507
|
+
|
|
508
|
+
const end_time = Date.now();
|
|
509
|
+
const end_memory = process.memoryUsage();
|
|
510
|
+
|
|
511
|
+
return {
|
|
512
|
+
...result,
|
|
513
|
+
performance: {
|
|
514
|
+
...result.performance,
|
|
515
|
+
total_duration_ms: end_time - start_time,
|
|
516
|
+
memory_usage: {
|
|
517
|
+
start_heap_mb: Math.round(start_memory.heapUsed / (1024 * 1024)),
|
|
518
|
+
end_heap_mb: Math.round(end_memory.heapUsed / (1024 * 1024)),
|
|
519
|
+
delta_heap_mb: Math.round((end_memory.heapUsed - start_memory.heapUsed) / (1024 * 1024)),
|
|
520
|
+
peak_heap_mb: Math.round(end_memory.heapUsed / (1024 * 1024))
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
};
|
|
524
|
+
};
|
|
525
|
+
|
|
526
|
+
/**
|
|
527
|
+
* Main bulk insert function with write queue integration.
|
|
528
|
+
* @param {string} database_name - Database name
|
|
529
|
+
* @param {string} collection_name - Collection name
|
|
530
|
+
* @param {Array<Object>} documents - Documents to insert
|
|
531
|
+
* @param {Object} [options={}] - Options
|
|
532
|
+
* @returns {Promise<Object>} Bulk insert results
|
|
533
|
+
*/
|
|
534
|
+
const bulk_insert = async (database_name, collection_name, documents, options = {}) => {
|
|
535
|
+
const write_queue = get_write_queue();
|
|
536
|
+
const operation_metadata = {
|
|
537
|
+
operation: 'bulk_insert_optimized',
|
|
538
|
+
database: database_name,
|
|
539
|
+
collection: collection_name,
|
|
540
|
+
document_count: documents.length
|
|
541
|
+
};
|
|
542
|
+
|
|
543
|
+
return await write_queue.enqueue_write_operation(
|
|
544
|
+
() => bulk_insert_optimized(database_name, collection_name, documents, options),
|
|
545
|
+
operation_metadata
|
|
546
|
+
);
|
|
547
|
+
};
|
|
548
|
+
|
|
549
|
+
export {
|
|
550
|
+
bulk_insert_optimized,
|
|
551
|
+
bulk_insert_with_metrics,
|
|
552
|
+
non_blocking_bulk_insert,
|
|
553
|
+
bulk_insert,
|
|
554
|
+
calculate_average_document_size,
|
|
555
|
+
calculate_bulk_map_size,
|
|
556
|
+
create_size_based_batches,
|
|
557
|
+
sort_documents_by_key,
|
|
558
|
+
pre_encode_documents
|
|
559
|
+
};
|