@joystick.js/db-canary 0.0.0-canary.2271 → 0.0.0-canary.2273

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- import S from"net";import{decode as k}from"msgpackr";import x from"./lib/op_types.js";import g from"./lib/safe_json_parse.js";import{load_settings as l,get_settings as m,get_port_configuration as p}from"./lib/load_settings.js";import{send_error as c}from"./lib/send_response.js";import{start_cluster as T}from"./cluster/index.js";import h from"./lib/logger.js";import{initialize_database as z,cleanup_database as O}from"./lib/query_engine.js";import{create_message_parser as I,encode_message as w}from"./lib/tcp_protocol.js";import{create_connection_manager as R}from"./lib/connection_manager.js";import{shutdown_write_queue as C}from"./lib/write_queue.js";import{setup_authentication as E,verify_password as q,get_client_ip as N,is_rate_limited as A,initialize_auth_manager as B,reset_auth_state as D}from"./lib/auth_manager.js";import{initialize_api_key_manager as $}from"./lib/api_key_manager.js";import{is_development_mode as y,display_development_startup_message as F,warn_undefined_node_env as J}from"./lib/development_mode.js";import{restore_backup as K,start_backup_schedule as P,stop_backup_schedule as G}from"./lib/backup_manager.js";import{initialize_replication_manager as M,shutdown_replication_manager as W}from"./lib/replication_manager.js";import{initialize_write_forwarder as H,shutdown_write_forwarder as U}from"./lib/write_forwarder.js";import{handle_database_operation as V,handle_admin_operation as Y,handle_ping_operation as j}from"./lib/operation_dispatcher.js";import{start_http_server as L,stop_http_server as Q}from"./lib/http_server.js";import{create_recovery_token as X,initialize_recovery_manager as v,reset_recovery_state as Z}from"./lib/recovery_manager.js";import{has_settings as ee}from"./lib/load_settings.js";const i=new Set;let s=null;const re=e=>e&&e.password,d=e=>({ok:0,error:e}),te=()=>({ok:1,version:"1.0.0",message:"Authentication successful"}),u=(e,r)=>{const t=w(r);e.write(t),e.end()},_=(e,r)=>{const t=w(r);e.write(t)},ne=async(e,r={})=>{if(!re(r)){const t=d("Authentication operation requires password to be set in data.");u(e,t);return}try{const t=N(e);if(A(t)){const a=d("Too many failed attempts. Please try again later.");u(e,a);return}if(!await q(r.password,t)){const a=d("Authentication failed");u(e,a);return}i.add(e.id);const o=te();_(e,o)}catch(t){const n=d(`Authentication error: ${t.message}`);u(e,n)}},oe=e=>({ok:1,password:e,message:"Authentication setup completed successfully. Save this password - it will not be shown again."}),ae=e=>({ok:0,error:`Setup error: ${e}`}),se=async(e,r={})=>{try{const t=E(),n=oe(t);_(e,n)}catch(t){const n=ae(t.message);_(e,n)}},ie=(e="")=>{if(!e)throw new Error("Must pass an op type for operation.");return x.includes(e)},ce=e=>g(e),_e=e=>{try{const r=k(e);return typeof r=="string"?g(r):r}catch{return null}},ar=e=>{try{return typeof e=="string"?ce(e):Buffer.isBuffer(e)?_e(e):e}catch{return null}},f=e=>y()?!0:i.has(e.id),pe=async(e,r)=>{if(e?.restore_from)try{r.info("Startup restore requested",{backup_filename:e.restore_from});const t=await K(e.restore_from);r.info("Startup restore completed",{backup_filename:e.restore_from,duration_ms:t.duration_ms});const n={...e};delete n.restore_from,process.env.JOYSTICK_DB_SETTINGS=JSON.stringify(n),l(),r.info("Removed restore_from from settings after successful restore")}catch(t){r.error("Startup restore failed",{backup_filename:e.restore_from,error:t.message}),r.info("Continuing with fresh database after restore failure")}},de=()=>{try{return l(),m()}catch{return null}},ue=async e=>{const r=e?.data_path||"./data";z(r),B(),await $(),v()},le=e=>{try{M(),e.info("Replication manager initialized")}catch(r){e.warn("Failed to initialize replication manager",{error:r.message})}},me=e=>{try{H(),e.info("Write forwarder initialized")}catch(r){e.warn("Failed to initialize write forwarder",{error:r.message})}},fe=(e,r)=>{if(e?.s3)try{P(),r.info("Backup scheduling started")}catch(t){r.warn("Failed to start backup scheduling",{error:t.message})}},ge=async(e,r)=>{try{const t=await L(e);return t&&r.info("HTTP server started",{http_port:e}),t}catch(t){return r.warn("Failed to start HTTP server",{error:t.message}),null}},he=()=>{if(y()){const{tcp_port:e,http_port:r}=p();F(e,r)}else J()},we=()=>R({max_connections:1e3,idle_timeout:600*1e3,request_timeout:5*1e3}),ye=async(e,r,t,n)=>{s.update_activity(e.id);try{const o=t.parse_messages(r);for(const a of o)await ve(e,a,r.length,n)}catch(o){n.error("Message parsing failed",{client_id:e.id,error:o.message}),c(e,{message:"Invalid message format"}),e.end()}},ve=async(e,r,t,n)=>{const o=r,a=o?.op||null;if(!a){c(e,{message:"Missing operation type"});return}if(!ie(a)){c(e,{message:"Invalid operation type"});return}const b=s.create_request_timeout(e.id,a);try{await be(e,a,o,t)}finally{clearTimeout(b)}},be=async(e,r,t,n)=>{const o=t?.data||{};switch(r){case"authentication":await ne(e,o);break;case"setup":await se(e,o);break;case"insert_one":case"update_one":case"delete_one":case"delete_many":case"bulk_write":case"find_one":case"find":case"count_documents":case"create_index":case"drop_index":case"get_indexes":await V(e,r,o,f,n,s,i);break;case"ping":j(e);break;case"admin":await Y(e,o,f,s,i);break;case"reload":await Se(e);break;default:c(e,{message:`Operation ${r} not implemented`})}},Se=async e=>{if(!f(e)){c(e,{message:"Authentication required"});return}try{const r=ke(),t=await xe(),n=Te(r,t);_(e,n)}catch(r){const t={ok:0,error:`Reload operation failed: ${r.message}`};_(e,t)}},ke=()=>{try{return m()}catch{return null}},xe=async()=>{try{return await l(),m()}catch{return{port:1983,authentication:{}}}},Te=(e,r)=>({ok:1,status:"success",message:"Configuration reloaded successfully",changes:{port_changed:e?e.port!==r.port:!1,authentication_changed:e?e.authentication?.password_hash!==r.authentication?.password_hash:!1},timestamp:new Date().toISOString()}),ze=(e,r)=>{r.info("Client disconnected",{socket_id:e.id}),i.delete(e.id),s.remove_connection(e.id)},Oe=(e,r,t)=>{t.error("Socket error",{socket_id:e.id,error:r.message}),i.delete(e.id),s.remove_connection(e.id)},Ie=(e,r,t)=>{e.on("data",async n=>{await ye(e,n,r,t)}),e.on("end",()=>{ze(e,t)}),e.on("error",n=>{Oe(e,n,t)})},Re=(e,r)=>{if(!s.add_connection(e))return;const t=I();Ie(e,t,r)},Ce=()=>async()=>{try{await Q(),G(),await W(),await U(),s&&s.shutdown(),i.clear(),await C(),await new Promise(e=>setTimeout(e,100)),await O(),D(),Z()}catch{}},sr=async()=>{const{create_context_logger:e}=h("server"),r=e(),t=de();await pe(t,r),await ue(t),le(r),me(r),fe(t,r),s=we();const{http_port:n}=p();await ge(n,r),he();const o=S.createServer((a={})=>{Re(a,r)});return o.cleanup=Ce(),o},Ee=e=>{try{v();const r=X();console.log("Emergency Recovery Token Generated"),console.log(`Visit: ${r.url}`),console.log("Token expires in 10 minutes"),e.info("Recovery token generated via CLI",{expires_at:new Date(r.expires_at).toISOString()}),process.exit(0)}catch(r){console.error("Failed to generate recovery token:",r.message),e.error("Recovery token generation failed",{error:r.message}),process.exit(1)}},qe=()=>{const{tcp_port:e}=p();return{worker_count:process.env.WORKER_COUNT?parseInt(process.env.WORKER_COUNT):void 0,port:e,environment:process.env.NODE_ENV||"development"}},Ne=(e,r)=>{const{tcp_port:t,http_port:n}=p(),o=ee();r.info("Starting JoystickDB server...",{workers:e.worker_count||"auto",tcp_port:t,http_port:n,environment:e.environment,has_settings:o,port_source:o?"JOYSTICK_DB_SETTINGS":"default"})};if(import.meta.url===`file://${process.argv[1]}`){const{create_context_logger:e}=h("main"),r=e();process.argv.includes("--generate-recovery-token")&&Ee(r);const t=qe();Ne(t,r),T(t)}export{ne as authentication,ie as check_op_type,sr as create_server,ar as parse_data,se as setup};
1
+ import S from"net";import{decode as k}from"msgpackr";import x from"./lib/op_types.js";import g from"./lib/safe_json_parse.js";import{load_settings as l,get_settings as m,get_port_configuration as c}from"./lib/load_settings.js";import{send_error as _}from"./lib/send_response.js";import{start_cluster as T}from"./cluster/index.js";import h from"./lib/logger.js";import{initialize_database as z,cleanup_database as O}from"./lib/query_engine.js";import{create_message_parser as I,encode_message as w}from"./lib/tcp_protocol.js";import{create_connection_manager as R}from"./lib/connection_manager.js";import{shutdown_write_queue as C}from"./lib/write_queue.js";import{setup_authentication as E,verify_password as q,get_client_ip as N,is_rate_limited as $,initialize_auth_manager as A,reset_auth_state as B}from"./lib/auth_manager.js";import{initialize_api_key_manager as D}from"./lib/api_key_manager.js";import{is_development_mode as y,display_development_startup_message as F,warn_undefined_node_env as J}from"./lib/development_mode.js";import{restore_backup as K,start_backup_schedule as P,stop_backup_schedule as j}from"./lib/backup_manager.js";import{initialize_replication_manager as G,shutdown_replication_manager as M}from"./lib/replication_manager.js";import{initialize_write_forwarder as W,shutdown_write_forwarder as H}from"./lib/write_forwarder.js";import{handle_database_operation as U,handle_admin_operation as V,handle_ping_operation as Y}from"./lib/operation_dispatcher.js";import{start_http_server as L,stop_http_server as Q}from"./lib/http_server.js";import{create_recovery_token as X,initialize_recovery_manager as v,reset_recovery_state as Z}from"./lib/recovery_manager.js";import{has_settings as ee}from"./lib/load_settings.js";const i=new Set;let s=null;const re=e=>e&&e.password,d=e=>({ok:0,error:e}),te=()=>({ok:1,version:"1.0.0",message:"Authentication successful"}),u=(e,r)=>{const t=w(r);e.write(t),e.end()},p=(e,r)=>{const t=w(r);e.write(t)},ne=async(e,r={})=>{if(!re(r)){const t=d("Authentication operation requires password to be set in data.");u(e,t);return}try{const t=N(e);if($(t)){const a=d("Too many failed attempts. Please try again later.");u(e,a);return}if(!await q(r.password,t)){const a=d("Authentication failed");u(e,a);return}i.add(e.id);const o=te();p(e,o)}catch(t){const n=d(`Authentication error: ${t.message}`);u(e,n)}},oe=e=>({ok:1,password:e,message:"Authentication setup completed successfully. Save this password - it will not be shown again."}),ae=e=>({ok:0,error:`Setup error: ${e}`}),se=async(e,r={})=>{try{const t=E(),n=oe(t);p(e,n)}catch(t){const n=ae(t.message);p(e,n)}},ie=(e="")=>{if(!e)throw new Error("Must pass an op type for operation.");return x.includes(e)},ce=e=>g(e),_e=e=>{try{const r=k(e);return typeof r=="string"?g(r):r}catch{return null}},ar=e=>{try{return typeof e=="string"?ce(e):Buffer.isBuffer(e)?_e(e):e}catch{return null}},f=e=>y()?!0:i.has(e.id),pe=async(e,r)=>{if(e?.restore_from)try{r.info("Startup restore requested",{backup_filename:e.restore_from});const t=await K(e.restore_from);r.info("Startup restore completed",{backup_filename:e.restore_from,duration_ms:t.duration_ms});const n={...e};delete n.restore_from,process.env.JOYSTICK_DB_SETTINGS=JSON.stringify(n),l(),r.info("Removed restore_from from settings after successful restore")}catch(t){r.error("Startup restore failed",{backup_filename:e.restore_from,error:t.message}),r.info("Continuing with fresh database after restore failure")}},de=()=>{try{return l(),m()}catch{return null}},ue=async e=>{const{tcp_port:r}=c(),t=e?.data_path||`./.joystick/data/joystickdb_${r}`;z(t),A(),await D(),v()},le=e=>{try{G(),e.info("Replication manager initialized")}catch(r){e.warn("Failed to initialize replication manager",{error:r.message})}},me=e=>{try{W(),e.info("Write forwarder initialized")}catch(r){e.warn("Failed to initialize write forwarder",{error:r.message})}},fe=(e,r)=>{if(e?.s3)try{P(),r.info("Backup scheduling started")}catch(t){r.warn("Failed to start backup scheduling",{error:t.message})}},ge=async(e,r)=>{try{const t=await L(e);return t&&r.info("HTTP server started",{http_port:e}),t}catch(t){return r.warn("Failed to start HTTP server",{error:t.message}),null}},he=()=>{if(y()){const{tcp_port:e,http_port:r}=c();F(e,r)}else J()},we=()=>R({max_connections:1e3,idle_timeout:600*1e3,request_timeout:5*1e3}),ye=async(e,r,t,n)=>{s.update_activity(e.id);try{const o=t.parse_messages(r);for(const a of o)await ve(e,a,r.length,n)}catch(o){n.error("Message parsing failed",{client_id:e.id,error:o.message}),_(e,{message:"Invalid message format"}),e.end()}},ve=async(e,r,t,n)=>{const o=r,a=o?.op||null;if(!a){_(e,{message:"Missing operation type"});return}if(!ie(a)){_(e,{message:"Invalid operation type"});return}const b=s.create_request_timeout(e.id,a);try{await be(e,a,o,t)}finally{clearTimeout(b)}},be=async(e,r,t,n)=>{const o=t?.data||{};switch(r){case"authentication":await ne(e,o);break;case"setup":await se(e,o);break;case"insert_one":case"update_one":case"delete_one":case"delete_many":case"bulk_write":case"find_one":case"find":case"count_documents":case"create_index":case"drop_index":case"get_indexes":await U(e,r,o,f,n,s,i);break;case"ping":Y(e);break;case"admin":await V(e,o,f,s,i);break;case"reload":await Se(e);break;default:_(e,{message:`Operation ${r} not implemented`})}},Se=async e=>{if(!f(e)){_(e,{message:"Authentication required"});return}try{const r=ke(),t=await xe(),n=Te(r,t);p(e,n)}catch(r){const t={ok:0,error:`Reload operation failed: ${r.message}`};p(e,t)}},ke=()=>{try{return m()}catch{return null}},xe=async()=>{try{return await l(),m()}catch{return{port:1983,authentication:{}}}},Te=(e,r)=>({ok:1,status:"success",message:"Configuration reloaded successfully",changes:{port_changed:e?e.port!==r.port:!1,authentication_changed:e?e.authentication?.password_hash!==r.authentication?.password_hash:!1},timestamp:new Date().toISOString()}),ze=(e,r)=>{r.info("Client disconnected",{socket_id:e.id}),i.delete(e.id),s.remove_connection(e.id)},Oe=(e,r,t)=>{t.error("Socket error",{socket_id:e.id,error:r.message}),i.delete(e.id),s.remove_connection(e.id)},Ie=(e,r,t)=>{e.on("data",async n=>{await ye(e,n,r,t)}),e.on("end",()=>{ze(e,t)}),e.on("error",n=>{Oe(e,n,t)})},Re=(e,r)=>{if(!s.add_connection(e))return;const t=I();Ie(e,t,r)},Ce=()=>async()=>{try{await Q(),j(),await M(),await H(),s&&s.shutdown(),i.clear(),await C(),await new Promise(e=>setTimeout(e,100)),await O(),B(),Z()}catch{}},sr=async()=>{const{create_context_logger:e}=h("server"),r=e(),t=de();await pe(t,r),await ue(t),le(r),me(r),fe(t,r),s=we();const{http_port:n}=c();await ge(n,r),he();const o=S.createServer((a={})=>{Re(a,r)});return o.cleanup=Ce(),o},Ee=e=>{try{v();const r=X();console.log("Emergency Recovery Token Generated"),console.log(`Visit: ${r.url}`),console.log("Token expires in 10 minutes"),e.info("Recovery token generated via CLI",{expires_at:new Date(r.expires_at).toISOString()}),process.exit(0)}catch(r){console.error("Failed to generate recovery token:",r.message),e.error("Recovery token generation failed",{error:r.message}),process.exit(1)}},qe=()=>{const{tcp_port:e}=c();return{worker_count:process.env.WORKER_COUNT?parseInt(process.env.WORKER_COUNT):void 0,port:e,environment:process.env.NODE_ENV||"development"}},Ne=(e,r)=>{const{tcp_port:t,http_port:n}=c(),o=ee();r.info("Starting JoystickDB server...",{workers:e.worker_count||"auto",tcp_port:t,http_port:n,environment:e.environment,has_settings:o,port_source:o?"JOYSTICK_DB_SETTINGS":"default"})};if(import.meta.url===`file://${process.argv[1]}`){const{create_context_logger:e}=h("main"),r=e();process.argv.includes("--generate-recovery-token")&&Ee(r);const t=qe();Ne(t,r),T(t)}export{ne as authentication,ie as check_op_type,sr as create_server,ar as parse_data,se as setup};
@@ -0,0 +1 @@
1
+ import{get_database as f,build_collection_key as b,check_and_grow_map_size as v}from"./query_engine.js";import{get_write_queue as B}from"./write_queue.js";import"./auto_index_manager.js";import R from"./logger.js";const{create_context_logger:w}=R("bulk_insert_optimizer"),C=100*1024*1024,U=1e3,D=1e4,I=(e,r=100)=>{const t=e.slice(0,Math.min(r,e.length)),n=t.reduce((o,s)=>o+Buffer.byteLength(JSON.stringify(s),"utf8"),0);return Math.ceil(n/t.length)},P=(e,r)=>{const t=e*r,n=2,o=1024*1024*1024*10;return Math.max(t*n,o)},J=async e=>{const r=w();if(e.length===0)return;const t=I(e),n=P(e.length,t);r.info("Pre-allocating map size for bulk insert",{document_count:e.length,avg_document_size:t,required_map_size:n,required_map_size_gb:Math.round(n/(1024*1024*1024)*100)/100}),await v();const o=f();if(o.resize)try{o.resize(n),r.info("Map size pre-allocated successfully",{new_map_size:n,new_map_size_gb:Math.round(n/(1024*1024*1024)*100)/100})}catch(s){r.warn("Failed to pre-allocate map size",{error:s.message})}},O=(e,r=C)=>{const t=[];let n=[],o=0;for(const s of e){const i=Buffer.byteLength(JSON.stringify(s),"utf8");o+i>r&&n.length>0?(t.push(n),n=[s],o=i):(n.push(s),o+=i)}return n.length>0&&t.push(n),t},T=(()=>{let e=Date.now()*1e3;return()=>(++e).toString(36).padStart(12,"0")})(),A=(e,r,t)=>e.map(n=>({...n,_id:n._id||T()})).sort((n,o)=>{const s=b(r,t,n._id),i=b(r,t,o._id);return s.localeCompare(i)}),y=(e,r,t)=>{const n=new Date().toISOString();return e.map(o=>{const s=o._id||T(),i={...o,_id:s,_created_at:o._created_at||n,_updated_at:o._updated_at||n},l=JSON.stringify(i);return{key:b(r,t,s),value:l,document_id:s}})},E=async(e,r)=>{const t=[];return await e.transaction(()=>{for(const{key:n,value:o,document_id:s}of r){if(e.get(n))throw new Error(`Document with _id ${s} already exists`);e.put(n,o),t.push(s)}}),t},F=async function*(e,r,t,n=U){const o=f();for(let s=0;s<e.length;s+=n){const i=e.slice(s,s+n),l=y(i,r,t);yield await E(o,l),i.length=0,l.length=0;const c=Math.floor(s/n);e.length>=5e6?(c%5===0&&global.gc&&(global.gc(),await new Promise(a=>setTimeout(a,100))),await new Promise(a=>setImmediate(a))):e.length>=1e6?(c%8===0&&global.gc&&(global.gc(),await new Promise(a=>setTimeout(a,75))),await new Promise(a=>setImmediate(a))):e.length>1e5?(c%25===0&&global.gc&&(global.gc(),await new Promise(a=>setTimeout(a,25))),await new Promise(a=>setImmediate(a))):c%10===0&&await new Promise(a=>setImmediate(a))}},G=()=>!1,Z=e=>{},j=async(e,r)=>{w().debug("Index rebuilding skipped (not implemented)",{database:e,collection:r})},p=async(e,r,t,n={})=>{const{disable_indexing:o=!0,pre_allocate_map_size:s=!0,sort_keys:i=!0,stream_processing:l=!0,batch_size:_=U}=n,c=w(),a=Date.now(),h=process.memoryUsage();if(!e||!r)throw new Error("Database name and collection name are required");if(!Array.isArray(t)||t.length===0)throw new Error("Documents must be a non-empty array");c.info("Starting optimized bulk insert",{database:e,collection:r,document_count:t.length,options:n});let k=!1;try{s&&await J(t),o&&(k=G());let u=t;i&&(u=A(t,e,r));const m=[];let d=0;if(l)for await(const g of F(u,e,r,_))m.push(...g),d+=g.length,d%D===0&&c.info("Bulk insert progress",{processed:d,total:t.length,percentage:Math.round(d/t.length*100)});else{const g=O(u),q=f();for(const L of g){const N=y(L,e,r),S=await E(q,N);m.push(...S),d+=S.length,d%D===0&&c.info("Bulk insert progress",{processed:d,total:t.length,percentage:Math.round(d/t.length*100)})}}o&&await j(e,r);const z=Date.now(),M=process.memoryUsage(),x={duration_ms:z-a,documents_per_second:Math.round(t.length/((z-a)/1e3)),memory_delta_mb:Math.round((M.heapUsed-h.heapUsed)/(1024*1024)),peak_memory_mb:Math.round(M.heapUsed/(1024*1024))};return c.info("Optimized bulk insert completed",{database:e,collection:r,inserted_count:m.length,performance:x}),{acknowledged:!0,inserted_count:m.length,inserted_ids:m,performance:x}}catch(u){throw c.error("Optimized bulk insert failed",{database:e,collection:r,error:u.message}),u}finally{o&&Z(k)}},H=async(e,r,t,n={})=>{const{chunk_size:o=1e4}=n,s={acknowledged:!0,inserted_count:0,inserted_ids:[],performance:{duration_ms:0,documents_per_second:0,memory_delta_mb:0,peak_memory_mb:0}},i=Date.now();for(let _=0;_<t.length;_+=o){const c=t.slice(_,_+o),a=await p(e,r,c,n);s.inserted_count+=a.inserted_count,s.inserted_ids.push(...a.inserted_ids),await new Promise(h=>setImmediate(h))}const l=Date.now();return s.performance.duration_ms=l-i,s.performance.documents_per_second=Math.round(t.length/((l-i)/1e3)),s},V=async(e,r,t,n={})=>{const o=Date.now(),s=process.memoryUsage(),i=await p(e,r,t,n),l=Date.now(),_=process.memoryUsage();return{...i,performance:{...i.performance,total_duration_ms:l-o,memory_usage:{start_heap_mb:Math.round(s.heapUsed/(1024*1024)),end_heap_mb:Math.round(_.heapUsed/(1024*1024)),delta_heap_mb:Math.round((_.heapUsed-s.heapUsed)/(1024*1024)),peak_heap_mb:Math.round(_.heapUsed/(1024*1024))}}}},$=async(e,r,t,n={})=>{const o=B(),s={operation:"bulk_insert_optimized",database:e,collection:r,document_count:t.length};return await o.enqueue_write_operation(()=>p(e,r,t,n),s)};export{$ as bulk_insert,p as bulk_insert_optimized,V as bulk_insert_with_metrics,I as calculate_average_document_size,P as calculate_bulk_map_size,O as create_size_based_batches,H as non_blocking_bulk_insert,y as pre_encode_documents,A as sort_documents_by_key};
@@ -0,0 +1 @@
1
+ import{bulk_insert_optimized as z}from"./bulk_insert_optimizer.js";import k from"./logger.js";const{create_context_logger:U}=k("memory_efficient_bulk_insert"),M=async function*(r,n={}){const{batch_size:a=1e3,document_template:c="minimal",test_id:i=Date.now().toString(36)}=n;for(let s=0;s<r;s+=a){const d=Math.min(a,r-s),l=[];for(let _=0;_<d;_++){const e=s+_;let o;c==="minimal"?o={_id:`mem_${i}_${e.toString().padStart(8,"0")}`,idx:e,cat:e%50,val:e%1e3}:c==="medium"?o={_id:`mem_${i}_${e.toString().padStart(8,"0")}`,name:`Document ${e}`,index:e,category:`category_${e%100}`,active:e%2===0,priority:e%5,score:Math.random()*100,created_timestamp:Date.now()+e}:c==="large"&&(o={_id:`mem_${i}_${e.toString().padStart(8,"0")}`,name:`Large Document ${e}`,index:e,category:`category_${e%100}`,subcategory:`subcategory_${e%20}`,active:e%2===0,priority:e%5,score:Math.random()*100,created_timestamp:Date.now()+e,description:`This is a large document with index ${e} for performance testing purposes.`,metadata:{created_by:`user_${e%1e3}`,department:`dept_${e%50}`,project:`project_${e%200}`,tags:[`tag_${e%10}`,`tag_${(e+1)%10}`]},measurements:Array.from({length:5},(f,p)=>({timestamp:Date.now()+e+p,value:Math.random()*1e3}))}),l.push(o)}yield l,await new Promise(_=>setImmediate(_))}},x=async(r,n,a,c={})=>{const{generation_batch_size:i=1e3,insert_batch_size:s=250,document_template:d="minimal",disable_indexing:l=!0,pre_allocate_map_size:_=!0,sort_keys:e=!0}=c,o=U(),f=Date.now(),p=process.memoryUsage();o.info("Starting memory-efficient bulk insert",{database:r,collection:n,document_count:a,generation_batch_size:i,insert_batch_size:s,document_template:d});const h=[];let b=0,m=0;try{for await(const w of M(a,{batch_size:i,document_template:d})){const $=await z(r,n,w,{disable_indexing:l,pre_allocate_map_size:m===0?_:!1,sort_keys:e,stream_processing:!0,batch_size:s});if(h.push(...$.inserted_ids),b+=$.inserted_count,m++,w.length=0,m%10===0){const t=process.memoryUsage();o.info("Memory-efficient bulk insert progress",{processed:b,total:a,percentage:Math.round(b/a*100),current_heap_mb:Math.round(t.heapUsed/(1024*1024)),batches_processed:m})}a>=1e7?(m%20===0&&global.gc&&(global.gc(),await new Promise(t=>setTimeout(t,25))),m%5===0&&await new Promise(t=>setImmediate(t))):a>=5e6?(m%10===0&&global.gc&&(global.gc(),await new Promise(t=>setTimeout(t,50))),m%2===0&&await new Promise(t=>setImmediate(t))):a>=1e6?(m%10===0&&global.gc&&(global.gc(),await new Promise(t=>setTimeout(t,50))),await new Promise(t=>setImmediate(t))):await new Promise(t=>setImmediate(t))}const g=Date.now(),u=process.memoryUsage(),y={duration_ms:g-f,documents_per_second:Math.round(a/((g-f)/1e3)),memory_usage:{start_heap_mb:Math.round(p.heapUsed/(1024*1024)),end_heap_mb:Math.round(u.heapUsed/(1024*1024)),delta_heap_mb:Math.round((u.heapUsed-p.heapUsed)/(1024*1024)),peak_heap_mb:Math.round(u.heapUsed/(1024*1024))}};return o.info("Memory-efficient bulk insert completed",{database:r,collection:n,inserted_count:h.length,performance:y}),{acknowledged:!0,inserted_count:h.length,inserted_ids:h,performance:y}}catch(g){throw o.error("Memory-efficient bulk insert failed",{database:r,collection:n,error:g.message}),g}},D=(r,n="minimal",a=1e3)=>{const c={minimal:50,medium:200,large:500},i=c[n]||c.minimal,s=Math.round(a*i/(1024*1024)),d=Math.round(r*i/(1024*1024)),l=s*3+100;return{avg_document_size_bytes:i,total_data_size_mb:d,batch_memory_mb:s,estimated_peak_memory_mb:l,recommended_batch_size:r>=1e7?2e3:r>=5e6?1e3:r>=1e6?750:1e3}};export{D as estimate_memory_usage,M as generate_documents_streaming,x as memory_efficient_bulk_insert};
package/package.json CHANGED
@@ -1,16 +1,21 @@
1
1
  {
2
2
  "name": "@joystick.js/db-canary",
3
3
  "type": "module",
4
- "version": "0.0.0-canary.2271",
5
- "canary_version": "0.0.0-canary.2270",
4
+ "version": "0.0.0-canary.2273",
5
+ "canary_version": "0.0.0-canary.2272",
6
6
  "description": "JoystickDB - A minimalist database server for the Joystick framework",
7
7
  "main": "./dist/server/index.js",
8
8
  "scripts": {
9
9
  "build": "node ./.build/index.js",
10
10
  "release": "node increment_version.js && npm run build && npm publish",
11
11
  "start": "node src/server/index.js",
12
- "test": "NODE_ENV=test ava --serial",
12
+ "test": "NODE_ENV=test NODE_OPTIONS='--expose-gc --max-old-space-size=8192' ava --serial --timeout=10m",
13
13
  "test:watch": "NODE_ENV=test ava --watch",
14
+ "test:performance": "NODE_ENV=test NODE_OPTIONS='--expose-gc --max-old-space-size=16384' ava --serial --timeout=30m tests/performance/*.test.js",
15
+ "test:enterprise": "NODE_ENV=test NODE_OPTIONS='--expose-gc --max-old-space-size=16384' ava --serial --timeout=30m tests/performance/bulk_insert_enterprise_*.test.js",
16
+ "test:benchmarks": "NODE_ENV=test NODE_OPTIONS='--expose-gc --max-old-space-size=16384' ava --serial --timeout=30m tests/performance/bulk_insert_benchmarks.test.js",
17
+ "test:1m": "NODE_ENV=test NODE_OPTIONS='--expose-gc --max-old-space-size=8192' ava --serial --timeout=15m tests/performance/bulk_insert_1m_test.js",
18
+ "test:runner": "node test_runner.js",
14
19
  "build:types": "tsc --declaration --emitDeclarationOnly --allowJs --outDir types src/**/*.js",
15
20
  "build:types:client": "tsc --declaration --emitDeclarationOnly --allowJs --outDir types/client src/client/*.js",
16
21
  "build:types:server": "tsc --declaration --emitDeclarationOnly --allowJs --outDir types/server src/server/**/*.js"
@@ -33,7 +38,8 @@
33
38
  },
34
39
  "ava": {
35
40
  "files": [
36
- "tests/**/*.test.js"
41
+ "tests/**/*.test.js",
42
+ "!tests/performance/*.test.js"
37
43
  ],
38
44
  "verbose": true
39
45
  },
@@ -315,7 +315,9 @@ const load_server_settings = () => {
315
315
  * @param {Object} settings - Server settings
316
316
  */
317
317
  const initialize_server_components = async (settings) => {
318
- const database_path = settings?.data_path || './data';
318
+ // NOTE: Use proper .joystick/data path with port number as fallback, matching other database providers
319
+ const { tcp_port } = get_port_configuration();
320
+ const database_path = settings?.data_path || `./.joystick/data/joystickdb_${tcp_port}`;
319
321
 
320
322
  initialize_database(database_path);
321
323
  initialize_auth_manager();
@@ -0,0 +1,559 @@
1
+ /**
2
+ * @fileoverview Bulk insert performance optimizer for JoystickDB.
3
+ *
4
+ * Provides enterprise-scale bulk insert capabilities with optimizations for:
5
+ * - Map size pre-allocation to prevent MDB_MAP_FULL errors
6
+ * - Size-based transaction batching for optimal performance
7
+ * - Key ordering and append mode for B-tree efficiency
8
+ * - Direct serialization to eliminate double encoding overhead
9
+ * - Safe index management with deferred rebuilding
10
+ * - Memory management and streaming processing
11
+ * - Concurrent read safety during bulk operations
12
+ */
13
+
14
+ import { get_database, generate_document_id, build_collection_key, check_and_grow_map_size } from './query_engine.js';
15
+ import { get_write_queue } from './write_queue.js';
16
+ import { get_auto_index_database, initialize_auto_index_database } from './auto_index_manager.js';
17
+ import create_logger from './logger.js';
18
+
19
+ const { create_context_logger } = create_logger('bulk_insert_optimizer');
20
+
21
+ /** @type {number} Optimal transaction size in bytes (100MB) */
22
+ const OPTIMAL_TRANSACTION_SIZE = 100 * 1024 * 1024;
23
+
24
+ /** @type {number} Default batch size for streaming processing */
25
+ const DEFAULT_STREAM_BATCH_SIZE = 1000;
26
+
27
+ /** @type {number} Progress logging interval */
28
+ const PROGRESS_LOG_INTERVAL = 10000;
29
+
30
+ /**
31
+ * Calculates the average document size from a sample.
32
+ * @param {Array<Object>} documents - Sample documents
33
+ * @param {number} [sample_size=100] - Number of documents to sample
34
+ * @returns {number} Average document size in bytes
35
+ */
36
+ const calculate_average_document_size = (documents, sample_size = 100) => {
37
+ const sample = documents.slice(0, Math.min(sample_size, documents.length));
38
+ const total_size = sample.reduce((sum, doc) => {
39
+ return sum + Buffer.byteLength(JSON.stringify(doc), 'utf8');
40
+ }, 0);
41
+
42
+ return Math.ceil(total_size / sample.length);
43
+ };
44
+
45
+ /**
46
+ * Calculates required map size for bulk insert operation.
47
+ * @param {number} document_count - Number of documents to insert
48
+ * @param {number} avg_document_size - Average document size in bytes
49
+ * @returns {number} Required map size in bytes
50
+ */
51
+ const calculate_bulk_map_size = (document_count, avg_document_size) => {
52
+ const estimated_size = document_count * avg_document_size;
53
+ const safety_factor = 2.0; // 100% overhead for indexes and growth
54
+ const minimum_size = 1024 * 1024 * 1024 * 10; // 10GB minimum
55
+
56
+ return Math.max(estimated_size * safety_factor, minimum_size);
57
+ };
58
+
59
+ /**
60
+ * Pre-allocates map size for bulk insert operation.
61
+ * @param {Array<Object>} documents - Documents to be inserted
62
+ * @returns {Promise<void>}
63
+ */
64
+ const prepare_bulk_insert_map_size = async (documents) => {
65
+ const log = create_context_logger();
66
+
67
+ if (documents.length === 0) {
68
+ return;
69
+ }
70
+
71
+ const avg_size = calculate_average_document_size(documents);
72
+ const required_map_size = calculate_bulk_map_size(documents.length, avg_size);
73
+
74
+ log.info('Pre-allocating map size for bulk insert', {
75
+ document_count: documents.length,
76
+ avg_document_size: avg_size,
77
+ required_map_size,
78
+ required_map_size_gb: Math.round(required_map_size / (1024 * 1024 * 1024) * 100) / 100
79
+ });
80
+
81
+ // Trigger map size growth check
82
+ await check_and_grow_map_size();
83
+
84
+ const db = get_database();
85
+ if (db.resize) {
86
+ try {
87
+ db.resize(required_map_size);
88
+ log.info('Map size pre-allocated successfully', {
89
+ new_map_size: required_map_size,
90
+ new_map_size_gb: Math.round(required_map_size / (1024 * 1024 * 1024) * 100) / 100
91
+ });
92
+ } catch (error) {
93
+ log.warn('Failed to pre-allocate map size', { error: error.message });
94
+ }
95
+ }
96
+ };
97
+
98
+ /**
99
+ * Creates size-based batches for optimal transaction performance.
100
+ * @param {Array<Object>} documents - Documents to batch
101
+ * @param {number} [target_size=OPTIMAL_TRANSACTION_SIZE] - Target batch size in bytes
102
+ * @returns {Array<Array<Object>>} Array of document batches
103
+ */
104
+ const create_size_based_batches = (documents, target_size = OPTIMAL_TRANSACTION_SIZE) => {
105
+ const batches = [];
106
+ let current_batch = [];
107
+ let current_size = 0;
108
+
109
+ for (const doc of documents) {
110
+ const doc_size = Buffer.byteLength(JSON.stringify(doc), 'utf8');
111
+
112
+ if (current_size + doc_size > target_size && current_batch.length > 0) {
113
+ batches.push(current_batch);
114
+ current_batch = [doc];
115
+ current_size = doc_size;
116
+ } else {
117
+ current_batch.push(doc);
118
+ current_size += doc_size;
119
+ }
120
+ }
121
+
122
+ if (current_batch.length > 0) {
123
+ batches.push(current_batch);
124
+ }
125
+
126
+ return batches;
127
+ };
128
+
129
+ /**
130
+ * Generates sequential document ID for optimal key ordering.
131
+ * @returns {string} Sequential document ID
132
+ */
133
+ const generate_sequential_id = (() => {
134
+ let counter = Date.now() * 1000; // Microsecond precision
135
+ return () => {
136
+ return (++counter).toString(36).padStart(12, '0');
137
+ };
138
+ })();
139
+
140
+ /**
141
+ * Sorts documents by key for optimal B-tree insertion.
142
+ * @param {Array<Object>} documents - Documents to sort
143
+ * @param {string} database_name - Database name
144
+ * @param {string} collection_name - Collection name
145
+ * @returns {Array<Object>} Sorted documents with assigned IDs
146
+ */
147
+ const sort_documents_by_key = (documents, database_name, collection_name) => {
148
+ return documents.map(doc => ({
149
+ ...doc,
150
+ _id: doc._id || generate_sequential_id()
151
+ })).sort((a, b) => {
152
+ const key_a = build_collection_key(database_name, collection_name, a._id);
153
+ const key_b = build_collection_key(database_name, collection_name, b._id);
154
+ return key_a.localeCompare(key_b);
155
+ });
156
+ };
157
+
158
+ /**
159
+ * Pre-encodes documents as Buffers for direct LMDB storage.
160
+ * @param {Array<Object>} documents - Documents to encode
161
+ * @param {string} database_name - Database name
162
+ * @param {string} collection_name - Collection name
163
+ * @returns {Array<Object>} Encoded document entries
164
+ */
165
+ const pre_encode_documents = (documents, database_name, collection_name) => {
166
+ const current_timestamp = new Date().toISOString();
167
+
168
+ return documents.map(doc => {
169
+ // Ensure document has an ID
170
+ const document_id = doc._id || generate_sequential_id();
171
+
172
+ const document_with_timestamps = {
173
+ ...doc,
174
+ _id: document_id,
175
+ _created_at: doc._created_at || current_timestamp,
176
+ _updated_at: doc._updated_at || current_timestamp
177
+ };
178
+
179
+ const json_string = JSON.stringify(document_with_timestamps);
180
+ const key = build_collection_key(database_name, collection_name, document_id);
181
+
182
+ return {
183
+ key,
184
+ value: json_string, // Store as string for LMDB msgpack encoding
185
+ document_id: document_id
186
+ };
187
+ });
188
+ };
189
+
190
+ /**
191
+ * Performs optimized bulk insert with pre-encoded documents.
192
+ * @param {Object} db - Database instance
193
+ * @param {Array<Object>} encoded_documents - Pre-encoded document entries
194
+ * @returns {Promise<Array<string>>} Array of inserted document IDs
195
+ */
196
+ const bulk_insert_pre_encoded = async (db, encoded_documents) => {
197
+ const inserted_ids = [];
198
+
199
+ await db.transaction(() => {
200
+ for (const { key, value, document_id } of encoded_documents) {
201
+ // Check if document already exists
202
+ const existing = db.get(key);
203
+ if (existing) {
204
+ throw new Error(`Document with _id ${document_id} already exists`);
205
+ }
206
+
207
+ db.put(key, value);
208
+ inserted_ids.push(document_id);
209
+ }
210
+ });
211
+
212
+ return inserted_ids;
213
+ };
214
+
215
+ /**
216
+ * Streaming bulk insert processor with aggressive memory management.
217
+ * @param {Array<Object>} documents - Documents to insert
218
+ * @param {string} database_name - Database name
219
+ * @param {string} collection_name - Collection name
220
+ * @param {number} [batch_size=DEFAULT_STREAM_BATCH_SIZE] - Streaming batch size
221
+ * @returns {AsyncGenerator<Array<string>>} Generator yielding inserted document IDs
222
+ */
223
+ const stream_bulk_insert = async function* (documents, database_name, collection_name, batch_size = DEFAULT_STREAM_BATCH_SIZE) {
224
+ const db = get_database();
225
+
226
+ for (let i = 0; i < documents.length; i += batch_size) {
227
+ const batch = documents.slice(i, i + batch_size);
228
+ const encoded_batch = pre_encode_documents(batch, database_name, collection_name);
229
+
230
+ const inserted_ids = await bulk_insert_pre_encoded(db, encoded_batch);
231
+ yield inserted_ids;
232
+
233
+ // Clear batch references immediately to help GC
234
+ batch.length = 0;
235
+ encoded_batch.length = 0;
236
+
237
+ const batch_number = Math.floor(i / batch_size);
238
+
239
+ // Ultra-aggressive memory management for very large datasets
240
+ if (documents.length >= 5000000) {
241
+ // For 5M+ documents, force GC every 5 batches with longer delays
242
+ if (batch_number % 5 === 0 && global.gc) {
243
+ global.gc();
244
+ await new Promise(resolve => setTimeout(resolve, 100));
245
+ }
246
+ // Always yield to event loop for very large datasets
247
+ await new Promise(resolve => setImmediate(resolve));
248
+ } else if (documents.length >= 1000000) {
249
+ // For 1M+ documents, force GC every 8 batches
250
+ if (batch_number % 8 === 0 && global.gc) {
251
+ global.gc();
252
+ await new Promise(resolve => setTimeout(resolve, 75));
253
+ }
254
+ // Yield every batch for large datasets
255
+ await new Promise(resolve => setImmediate(resolve));
256
+ } else if (documents.length > 100000) {
257
+ // For 100K-1M documents, force GC every 25 batches
258
+ if (batch_number % 25 === 0 && global.gc) {
259
+ global.gc();
260
+ await new Promise(resolve => setTimeout(resolve, 25));
261
+ }
262
+ // Yield every batch for medium datasets
263
+ await new Promise(resolve => setImmediate(resolve));
264
+ } else {
265
+ // For smaller datasets, yield every 10 batches as before
266
+ if (batch_number % 10 === 0) {
267
+ await new Promise(resolve => setImmediate(resolve));
268
+ }
269
+ }
270
+ }
271
+ };
272
+
273
+ /**
274
+ * Safely disables auto-indexing during bulk operations.
275
+ * @returns {boolean} Previous auto-indexing state
276
+ */
277
+ const disable_auto_indexing = () => {
278
+ // Auto-indexing management will be implemented in future versions
279
+ // For now, return false to indicate no auto-indexing was disabled
280
+ return false;
281
+ };
282
+
283
+ /**
284
+ * Re-enables auto-indexing after bulk operations.
285
+ * @param {boolean} was_enabled - Previous auto-indexing state
286
+ */
287
+ const restore_auto_indexing = (was_enabled) => {
288
+ // Auto-indexing management will be implemented in future versions
289
+ // For now, this is a no-op
290
+ };
291
+
292
+ /**
293
+ * Rebuilds collection indexes after bulk insert.
294
+ * @param {string} database_name - Database name
295
+ * @param {string} collection_name - Collection name
296
+ * @returns {Promise<void>}
297
+ */
298
+ const rebuild_collection_indexes = async (database_name, collection_name) => {
299
+ const log = create_context_logger();
300
+
301
+ // Index rebuilding will be implemented in future versions
302
+ // For now, this is a no-op
303
+ log.debug('Index rebuilding skipped (not implemented)', {
304
+ database: database_name,
305
+ collection: collection_name
306
+ });
307
+ };
308
+
309
+ /**
310
+ * Optimized bulk insert implementation with all performance optimizations.
311
+ * @param {string} database_name - Database name
312
+ * @param {string} collection_name - Collection name
313
+ * @param {Array<Object>} documents - Documents to insert
314
+ * @param {Object} [options={}] - Optimization options
315
+ * @returns {Promise<Object>} Bulk insert results with performance metrics
316
+ */
317
+ const bulk_insert_optimized = async (database_name, collection_name, documents, options = {}) => {
318
+ const {
319
+ disable_indexing = true,
320
+ pre_allocate_map_size = true,
321
+ sort_keys = true,
322
+ stream_processing = true,
323
+ batch_size = DEFAULT_STREAM_BATCH_SIZE
324
+ } = options;
325
+
326
+ const log = create_context_logger();
327
+ const start_time = Date.now();
328
+ const start_memory = process.memoryUsage();
329
+
330
+ // Validate parameters
331
+ if (!database_name || !collection_name) {
332
+ throw new Error('Database name and collection name are required');
333
+ }
334
+
335
+ if (!Array.isArray(documents) || documents.length === 0) {
336
+ throw new Error('Documents must be a non-empty array');
337
+ }
338
+
339
+ log.info('Starting optimized bulk insert', {
340
+ database: database_name,
341
+ collection: collection_name,
342
+ document_count: documents.length,
343
+ options
344
+ });
345
+
346
+ let auto_index_was_enabled = false;
347
+
348
+ try {
349
+ // Phase 1: Pre-allocate map size
350
+ if (pre_allocate_map_size) {
351
+ await prepare_bulk_insert_map_size(documents);
352
+ }
353
+
354
+ // Phase 2: Disable auto-indexing
355
+ if (disable_indexing) {
356
+ auto_index_was_enabled = disable_auto_indexing();
357
+ }
358
+
359
+ // Phase 3: Sort documents by key
360
+ let processed_documents = documents;
361
+ if (sort_keys) {
362
+ processed_documents = sort_documents_by_key(documents, database_name, collection_name);
363
+ }
364
+
365
+ // Phase 4: Process documents
366
+ const all_inserted_ids = [];
367
+ let processed_count = 0;
368
+
369
+ if (stream_processing) {
370
+ // Streaming processing for memory efficiency
371
+ for await (const inserted_ids of stream_bulk_insert(processed_documents, database_name, collection_name, batch_size)) {
372
+ all_inserted_ids.push(...inserted_ids);
373
+ processed_count += inserted_ids.length;
374
+
375
+ // Log progress
376
+ if (processed_count % PROGRESS_LOG_INTERVAL === 0) {
377
+ log.info('Bulk insert progress', {
378
+ processed: processed_count,
379
+ total: documents.length,
380
+ percentage: Math.round((processed_count / documents.length) * 100)
381
+ });
382
+ }
383
+ }
384
+ } else {
385
+ // Batch processing for smaller datasets
386
+ const batches = create_size_based_batches(processed_documents);
387
+ const db = get_database();
388
+
389
+ for (const batch of batches) {
390
+ const encoded_batch = pre_encode_documents(batch, database_name, collection_name);
391
+ const inserted_ids = await bulk_insert_pre_encoded(db, encoded_batch);
392
+ all_inserted_ids.push(...inserted_ids);
393
+ processed_count += inserted_ids.length;
394
+
395
+ // Log progress
396
+ if (processed_count % PROGRESS_LOG_INTERVAL === 0) {
397
+ log.info('Bulk insert progress', {
398
+ processed: processed_count,
399
+ total: documents.length,
400
+ percentage: Math.round((processed_count / documents.length) * 100)
401
+ });
402
+ }
403
+ }
404
+ }
405
+
406
+ // Phase 5: Rebuild indexes
407
+ if (disable_indexing) {
408
+ await rebuild_collection_indexes(database_name, collection_name);
409
+ }
410
+
411
+ const end_time = Date.now();
412
+ const end_memory = process.memoryUsage();
413
+
414
+ const performance_metrics = {
415
+ duration_ms: end_time - start_time,
416
+ documents_per_second: Math.round(documents.length / ((end_time - start_time) / 1000)),
417
+ memory_delta_mb: Math.round((end_memory.heapUsed - start_memory.heapUsed) / (1024 * 1024)),
418
+ peak_memory_mb: Math.round(end_memory.heapUsed / (1024 * 1024))
419
+ };
420
+
421
+ log.info('Optimized bulk insert completed', {
422
+ database: database_name,
423
+ collection: collection_name,
424
+ inserted_count: all_inserted_ids.length,
425
+ performance: performance_metrics
426
+ });
427
+
428
+ return {
429
+ acknowledged: true,
430
+ inserted_count: all_inserted_ids.length,
431
+ inserted_ids: all_inserted_ids,
432
+ performance: performance_metrics
433
+ };
434
+
435
+ } catch (error) {
436
+ log.error('Optimized bulk insert failed', {
437
+ database: database_name,
438
+ collection: collection_name,
439
+ error: error.message
440
+ });
441
+ throw error;
442
+ } finally {
443
+ // Always restore auto-indexing
444
+ if (disable_indexing) {
445
+ restore_auto_indexing(auto_index_was_enabled);
446
+ }
447
+ }
448
+ };
449
+
450
+ /**
451
+ * Non-blocking bulk insert that yields to allow concurrent reads.
452
+ * @param {string} database_name - Database name
453
+ * @param {string} collection_name - Collection name
454
+ * @param {Array<Object>} documents - Documents to insert
455
+ * @param {Object} [options={}] - Options
456
+ * @returns {Promise<Object>} Bulk insert results
457
+ */
458
+ const non_blocking_bulk_insert = async (database_name, collection_name, documents, options = {}) => {
459
+ const { chunk_size = 10000 } = options;
460
+
461
+ const all_results = {
462
+ acknowledged: true,
463
+ inserted_count: 0,
464
+ inserted_ids: [],
465
+ performance: {
466
+ duration_ms: 0,
467
+ documents_per_second: 0,
468
+ memory_delta_mb: 0,
469
+ peak_memory_mb: 0
470
+ }
471
+ };
472
+
473
+ const start_time = Date.now();
474
+
475
+ // Process in smaller chunks to ensure reads are never blocked
476
+ for (let i = 0; i < documents.length; i += chunk_size) {
477
+ const chunk = documents.slice(i, i + chunk_size);
478
+ const result = await bulk_insert_optimized(database_name, collection_name, chunk, options);
479
+
480
+ all_results.inserted_count += result.inserted_count;
481
+ all_results.inserted_ids.push(...result.inserted_ids);
482
+
483
+ // Brief yield to allow reads to proceed
484
+ await new Promise(resolve => setImmediate(resolve));
485
+ }
486
+
487
+ const end_time = Date.now();
488
+ all_results.performance.duration_ms = end_time - start_time;
489
+ all_results.performance.documents_per_second = Math.round(documents.length / ((end_time - start_time) / 1000));
490
+
491
+ return all_results;
492
+ };
493
+
494
+ /**
495
+ * Bulk insert with performance monitoring and metrics.
496
+ * @param {string} database_name - Database name
497
+ * @param {string} collection_name - Collection name
498
+ * @param {Array<Object>} documents - Documents to insert
499
+ * @param {Object} [options={}] - Options
500
+ * @returns {Promise<Object>} Bulk insert results with detailed metrics
501
+ */
502
+ const bulk_insert_with_metrics = async (database_name, collection_name, documents, options = {}) => {
503
+ const start_time = Date.now();
504
+ const start_memory = process.memoryUsage();
505
+
506
+ const result = await bulk_insert_optimized(database_name, collection_name, documents, options);
507
+
508
+ const end_time = Date.now();
509
+ const end_memory = process.memoryUsage();
510
+
511
+ return {
512
+ ...result,
513
+ performance: {
514
+ ...result.performance,
515
+ total_duration_ms: end_time - start_time,
516
+ memory_usage: {
517
+ start_heap_mb: Math.round(start_memory.heapUsed / (1024 * 1024)),
518
+ end_heap_mb: Math.round(end_memory.heapUsed / (1024 * 1024)),
519
+ delta_heap_mb: Math.round((end_memory.heapUsed - start_memory.heapUsed) / (1024 * 1024)),
520
+ peak_heap_mb: Math.round(end_memory.heapUsed / (1024 * 1024))
521
+ }
522
+ }
523
+ };
524
+ };
525
+
526
+ /**
527
+ * Main bulk insert function with write queue integration.
528
+ * @param {string} database_name - Database name
529
+ * @param {string} collection_name - Collection name
530
+ * @param {Array<Object>} documents - Documents to insert
531
+ * @param {Object} [options={}] - Options
532
+ * @returns {Promise<Object>} Bulk insert results
533
+ */
534
+ const bulk_insert = async (database_name, collection_name, documents, options = {}) => {
535
+ const write_queue = get_write_queue();
536
+ const operation_metadata = {
537
+ operation: 'bulk_insert_optimized',
538
+ database: database_name,
539
+ collection: collection_name,
540
+ document_count: documents.length
541
+ };
542
+
543
+ return await write_queue.enqueue_write_operation(
544
+ () => bulk_insert_optimized(database_name, collection_name, documents, options),
545
+ operation_metadata
546
+ );
547
+ };
548
+
549
+ export {
550
+ bulk_insert_optimized,
551
+ bulk_insert_with_metrics,
552
+ non_blocking_bulk_insert,
553
+ bulk_insert,
554
+ calculate_average_document_size,
555
+ calculate_bulk_map_size,
556
+ create_size_based_batches,
557
+ sort_documents_by_key,
558
+ pre_encode_documents
559
+ };